In [1]:
import cx_Oracle
import pandas as pd

In [2]:
def extract_oltp(table_names):

    conStr = 'system/oracle@localhost:1521/xe'

    # initialize the connection object
    conn = None

    try:

        # create a connection object    
        conn = cx_Oracle.connect(conStr)

        # get a cursor object from the connection
        cur = conn.cursor()

        for table_name in table_names:

            # construct SQL query using table_name argument
            sqlTxt = f'select * from "SSP_RES".{table_name}'

            # execute query and fetch results
            cur.execute(sqlTxt)
            record = cur.fetchall()

            # create pandas DataFrame from query results
            df= pd.DataFrame.from_records(record, columns = [x[0] for x in cur.description] )

            variable = f'df_{table_name}'
            globals()[variable] = df

            print(f"{variable} succesfully loaded!")        

    except Exception as err:
        print('Error while connecting to the db')
        print(err)    
        
    finally:
        if(conn):
            # close the cursor object to avoid memory leaks
            cur.close()

            # close the connection object also
            conn.close()

In [3]:
table_names = [
    "SRC_Country", "SRC_City", "SRC_Guest", "SRC_Building", "SRC_Apartment_Type", "SRC_Apartment",
    "SRC_Reservation", "SRC_Content", "SRC_Contains", "SRC_Apartment_Ocupancy", "SRC_Pricelist"]
    
extract_oltp(table_names)

df_SRC_Country succesfully loaded!
df_SRC_City succesfully loaded!
df_SRC_Guest succesfully loaded!
df_SRC_Building succesfully loaded!
df_SRC_Apartment_Type succesfully loaded!
df_SRC_Apartment succesfully loaded!
df_SRC_Reservation succesfully loaded!
df_SRC_Content succesfully loaded!
df_SRC_Contains succesfully loaded!
df_SRC_Apartment_Ocupancy succesfully loaded!
df_SRC_Pricelist succesfully loaded!


In [4]:
df_guest, df_country = df_SRC_Guest.copy(), df_SRC_Country.copy()

df_guest_country = pd.merge(df_guest[["GUEST_ID", "GUEST_FIRSTNAME",  "GUEST_LASTNAME", "COUNTRY_ID"]],
                            df_country[["COUNTRY_ID", "COUNTRY_NAME"]], on = "COUNTRY_ID", how = 'left')

df_guest_country["GUEST_NAME"] = df_guest_country["GUEST_FIRSTNAME"] + " " + df_guest_country["GUEST_LASTNAME"]
df_guest_country.drop(columns = ["COUNTRY_ID", "GUEST_FIRSTNAME", "GUEST_LASTNAME"], inplace = True)
df_guest_country.head()

Unnamed: 0,GUEST_ID,COUNTRY_NAME,GUEST_NAME


In [5]:
df_content, df_contains, df_apartment = df_SRC_Content.copy(), df_SRC_Contains.copy(), df_SRC_Apartment.copy()

df_content_beds = df_content.loc[df_content["CONTENT_NAME"] == "beds"]

df_contains_content = pd.merge(df_contains, df_content_beds, on="CONTENT_ID", how="inner")

df_apartment_contains = pd.merge(df_contains_content, df_apartment[["APARTMENT_ID", "APARTMENT_NAME"]], on="APARTMENT_ID", how="inner")
df_apartment_contains.drop(columns = ["CONTENT_ID", "CONTENT_NAME"], inplace = True)
df_apartment_contains.head()

Unnamed: 0,CONTENT_QUANTITY,APARTMENT_ID,APARTMENT_NAME


In [6]:
df_reservation = df_SRC_Reservation.copy()

df_reservation["START_DATE"] = pd.to_datetime(df_reservation["START_DATE"])
df_reservation["END_DATE"] = pd.to_datetime(df_reservation["END_DATE"])
df_reservation["BOOKED"] = pd.to_datetime(df_reservation["BOOKED"])

df_reservation.drop(columns = ["RESERVIATION_ID", "PAYMENT_METHOD"], inplace = True)

df_reservation["# of days pre booked"] = (df_reservation["START_DATE"] - df_reservation["BOOKED"]).dt.days
df_reservation["# of nights"] = (df_reservation["END_DATE"] - df_reservation["START_DATE"]).dt.days

df_reservation["Tax"] = df_reservation["NUMBER_OF_GUESTS"] * df_reservation["# of nights"]
df_reservation["Earnings after Tax"] = df_reservation["PRICE"] - df_reservation["Tax"]

df_reservation["ADVERTISER"] = "OLTP"
df_reservation.head()

Unnamed: 0,BOOKED,START_DATE,END_DATE,STATUS,PRICE,NUMBER_OF_GUESTS,ADVERTISER,GUEST_ID,APARTMENT_ID,# of days pre booked,# of nights,Tax,Earnings after Tax


In [7]:
df_reservation_guest = pd.merge(df_reservation, df_guest_country, on = "GUEST_ID", how = 'left')

df_merged = pd.merge(df_reservation_guest, df_apartment_contains, on = "APARTMENT_ID", how = 'left')
df_merged.head()

Unnamed: 0,BOOKED,START_DATE,END_DATE,STATUS,PRICE,NUMBER_OF_GUESTS,ADVERTISER,# of days pre booked,# of nights,Tax,Earnings after Tax,GUEST_ID,COUNTRY_NAME,GUEST_NAME,CONTENT_QUANTITY,APARTMENT_ID,APARTMENT_NAME


In [8]:
df_merged.rename(columns = {"COUNTRY_NAME": "Origin", "GUEST_NAME": "Guest name", "STATUS" : "Status", "NUMBER_OF_GUESTS" : "# of guests", "BOOKED" : "Booked", "START_DATE" : "Start date", 
                            "END_DATE" : "End date", "ADVERTISER" : "Advertiser", "PRICE" : "Earnings", "APARTMENT_NAME" : "Listing name", "CONTENT_QUANTITY": "# of beds"}, inplace = True)

df = df_merged[["Status", "Guest name", "Origin", "# of guests", "Booked", "Start date", "End date", "# of nights", "# of days pre booked", "Listing name", "# of beds", "Earnings", "Tax", "Earnings after Tax", "Advertiser"]]

Exporting data frame to excel file

In [9]:
exportPath = "./preprocessed files/"

df.to_excel(exportPath + "Final_Oltp.xlsx")
df.head()

Unnamed: 0,Status,Guest name,Origin,# of guests,Booked,Start date,End date,# of nights,# of days pre booked,Listing name,# of beds,Earnings,Tax,Earnings after Tax,Advertiser
