In [17]:
import cx_Oracle
import pandas as pd

In [18]:
def extract_oltp(table_names):

    conStr = 'system/oracle@localhost:1521/xe'

    # initialize the connection object
    conn = None

    try:

        # create a connection object    
        conn = cx_Oracle.connect(conStr)

        # get a cursor object from the connection
        cur = conn.cursor()

        for table_name in table_names:

            # construct SQL query using table_name argument
            sqlTxt = f'select * from "SSP_RES".{table_name}'

            # execute query and fetch results
            cur.execute(sqlTxt)
            record = cur.fetchall()

            # create pandas DataFrame from query results
            df= pd.DataFrame.from_records(record, columns = [x[0] for x in cur.description] )

            variable = f'df_{table_name}'
            globals()[variable] = df

            print(f"{variable} succesfully loaded!")        

    except Exception as err:
        print('Error while connecting to the db')
        print(err)    
        
    finally:
        if(conn):
            # close the cursor object to avoid memory leaks
            cur.close()

            # close the connection object also
            conn.close()

In [19]:
table_names = [
    "SRC_Country", "SRC_City", "SRC_Guest", "SRC_Building", "SRC_Apartment_Type", "SRC_Apartment",
    "SRC_Reservation", "SRC_Content", "SRC_Contains", "SRC_Apartment_Ocupancy", "SRC_Pricelist"]

In [20]:
extract_oltp(table_names)

df_SRC_Country succesfully loaded!
df_SRC_City succesfully loaded!
df_SRC_Guest succesfully loaded!
df_SRC_Building succesfully loaded!
df_SRC_Apartment_Type succesfully loaded!
df_SRC_Apartment succesfully loaded!
df_SRC_Reservation succesfully loaded!
df_SRC_Content succesfully loaded!
df_SRC_Contains succesfully loaded!
df_SRC_Apartment_Ocupancy succesfully loaded!
df_SRC_Pricelist succesfully loaded!


In [21]:
df_guest_country = pd.merge(df_SRC_Guest[["GUEST_ID", "GUEST_FIRSTNAME",  "GUEST_LASTNAME", "COUNTRY_ID"]],
                            df_SRC_Country[["COUNTRY_ID", "COUNTRY_NAME"]],
                            on = "COUNTRY_ID",
                            how = 'left')

df_guest_country["GUEST_NAME"] = df_guest_country["GUEST_FIRSTNAME"] + " " + df_guest_country["GUEST_LASTNAME"]

df_guest_country.drop(columns = ["COUNTRY_ID", "GUEST_FIRSTNAME", "GUEST_LASTNAME"], inplace = True)
df_guest_country.rename(columns = {"GUEST_NAME" : "Guest name", "COUNTRY_NAME": "Origin"}, inplace = True)

df_guest_country.head()

Unnamed: 0,GUEST_ID,Origin,Guest name


In [6]:
df_SRC_Content_Beds = df_SRC_Content.loc[df_SRC_Content["CONTENT_NAME"] == "beds"]

df_contains_content = pd.merge(df_SRC_Contains, df_SRC_Content_Beds, on="CONTENT_ID", how="inner")

df_apartment = pd.merge(df_contains_content, df_SRC_Apartment[["APARTMENT_ID", "APARTMENT_NAME"]], on="APARTMENT_ID", how="inner")

df_apartment.drop(columns = ["CONTENT_ID", "CONTENT_NAME"], inplace = True)
df_apartment.rename(columns = {"APARTMENT_NAME" : "Listing name", "CONTENT_QUANTITY": "# of beds"}, inplace = True)

df_apartment = df_apartment[["APARTMENT_ID", "Listing name", "# of beds"]]

df_apartment.head()

Unnamed: 0,APARTMENT_ID,Listing name,# of beds


In [7]:
df_SRC_Reservation 

Unnamed: 0,RESERVIATION_ID,BOOKED,START_DATE,END_DATE,STATUS,PRICE,NUMBER_OF_GUESTS,PAYMENT_METHOD,ADVERTISER,GUEST_ID,APARTMENT_ID


In [8]:
df_SRC_Reservation["START_DATE"] = pd.to_datetime(df_SRC_Reservation["START_DATE"])
df_SRC_Reservation["END_DATE"] = pd.to_datetime(df_SRC_Reservation["END_DATE"])
df_SRC_Reservation["BOOKED"] = pd.to_datetime(df_SRC_Reservation["BOOKED"])

df_SRC_Reservation.drop(columns = ["RESERVIATION_ID", "PAYMENT_METHOD"], inplace = True)

df_SRC_Reservation["# of days pre booked"] = (df_SRC_Reservation["START_DATE"] - df_SRC_Reservation["BOOKED"]).dt.days
df_SRC_Reservation["# of nights"] = (df_SRC_Reservation["END_DATE"] - df_SRC_Reservation["START_DATE"]).dt.days

df_SRC_Reservation["Tax"] = df_SRC_Reservation["NUMBER_OF_GUESTS"] * df_SRC_Reservation["# of nights"]
df_SRC_Reservation["Earnings after Tax"] = df_SRC_Reservation["PRICE"] - df_SRC_Reservation["Tax"]

df_SRC_Reservation.head()

Unnamed: 0,BOOKED,START_DATE,END_DATE,STATUS,PRICE,NUMBER_OF_GUESTS,ADVERTISER,GUEST_ID,APARTMENT_ID,# of days pre booked,# of nights,Tax,Earnings after Tax


In [9]:
df_SRC_Reservation.rename(columns = {
                "STATUS" : "Status", "NUMBER_OF_GUESTS" : "# of guests", "BOOKED" : "Booked",
                "START_DATE" : "Start date", "END_DATE" : "End date", "ADVERTISER" : "Advertiser",
                "PRICE" : "Earnings" }, inplace = True)

df_SRC_Reservation.head()


Unnamed: 0,Booked,Start date,End date,Status,Earnings,# of guests,Advertiser,GUEST_ID,APARTMENT_ID,# of days pre booked,# of nights,Tax,Earnings after Tax


In [10]:
df_res_guest = pd.merge(df_SRC_Reservation,
                    df_guest_country,
                    on = "GUEST_ID",
                    how = 'left')

df_final = pd.merge(df_res_guest,
                    df_apartment,
                    on = "APARTMENT_ID",
                    how = 'left')

df_final.head()

Unnamed: 0,Booked,Start date,End date,Status,Earnings,# of guests,Advertiser,# of days pre booked,# of nights,Tax,Earnings after Tax,GUEST_ID,Origin,Guest name,APARTMENT_ID,Listing name,# of beds


In [11]:
df = df_final[["Status", "Guest name", "Origin", "# of guests", "Booked", "Start date", "End date", "# of nights", "# of days pre booked", "Listing name", "# of beds", "Earnings", "Tax", "Earnings after Tax", "Advertiser"]]

In [13]:
exportPath = "./"

df.to_excel(exportPath + "Final_OLTP.xlsx")
df.head()

Unnamed: 0,Status,Guest name,Origin,# of guests,Booked,Start date,End date,# of nights,# of days pre booked,Listing name,# of beds,Earnings,Tax,Earnings after Tax,Advertiser
