# 0. Import libraries and packages

In [None]:
import pandas as pd

# 1. Datasets - Load and Exploration

## 1.1 Fact table - Casinodaily 

In [None]:
casinodaily = pd.read_csv('../datasets/casinodaily.csv')

In [None]:
casinodaily.info()

In [None]:
casinodaily.CasinoProviderId.unique()

In [None]:
casinodaily.head(20)

In [None]:
casinodaily.CurrencyId.unique()

In [None]:
casinodaily[casinodaily['CurrencyId']==10]

In [None]:
casinodaily[casinodaily['CurrencyId']==10].CountryId.unique()

## 1.2 Dimension table - Users

In [None]:
users = pd.read_csv('../datasets/users.csv')

In [None]:
users.info()

In [None]:
users.head()

In [None]:
users.Country.unique()

### Convert BirthDate datatype to date

In [None]:
users['BirthDate'] =  pd.to_datetime(users['BirthDate'], errors = 'coerce')

In [None]:
users.info()

## 1.3 Dimension table - Casino Manufacturers

In [None]:
manufacturers = pd.read_csv('../datasets/casinomanufacturers.csv', sep = ', "', engine = 'python')

In [None]:
manufacturers.info()

In [None]:
manufacturers.head(30)

### Split/spread data into multiple columns

In [None]:
manufacturers['"CasinoManufacturerId,""CasinoManufacturerName"",""FromDate"",""ToDate"",""LatestFlag"""'].str.split(',',n=4,expand=True).set_axis(['CasinoManufacturerId','CasinoManufacturerName','FromDate', 'ToDate', 'LatestFlag'],axis=1)

In [None]:
manufacturers = manufacturers['"CasinoManufacturerId,""CasinoManufacturerName"",""FromDate"",""ToDate"",""LatestFlag"""'].str.split(',',n=4,expand=True).set_axis(['CasinoManufacturerId','CasinoManufacturerName','FromDate', 'ToDate', 'LatestFlag'],axis=1)

### Remove all " from column values

In [None]:
manufacturers['CasinoManufacturerId'] = manufacturers['CasinoManufacturerId'].str.replace('"', '')
manufacturers['CasinoManufacturerName'] = manufacturers['CasinoManufacturerName'].str.replace('"', '')
manufacturers['LatestFlag'] = manufacturers['LatestFlag'].str.replace('"', '')

### Convert CasinoManufacturerId and LatestFlag columns to integer

In [None]:
manufacturers[['CasinoManufacturerId', 'LatestFlag']] = manufacturers[['CasinoManufacturerId', 'LatestFlag']].apply(pd.to_numeric)

In [None]:
manufacturers[manufacturers['LatestFlag'] == 1]

In [None]:
manufacturers.info()

### Keeping only the latest manufacturers names

In [None]:
manufacturers = manufacturers[manufacturers['LatestFlag'] == 1]

## 1.4 Dimension table - Casino Providers

In [None]:
providers = pd.read_csv('../datasets/casinoproviders.csv')

In [None]:
providers.info()

In [None]:
providers

## 1.5 Dimension table - Currency Rates

In [None]:
currencyrates = pd.read_csv('../datasets/currencyrates.csv')

In [None]:
currencyrates.info()

In [None]:
currencyrates

In [None]:
currencyrates.ToCurrencyId.unique()

# 3. Final aggregated table

In [None]:
casinodaily = casinodaily [casinodaily['CurrencyId']==15]

I have excluded data with CurrencyId = 10 since the Currency Rate table has only currency rate for CurrencyId = 15.

In [None]:
final_table = casinodaily.merge(users, how = 'left', left_on = 'UserID', right_on= 'user_id').merge(manufacturers, how = 'left', on = 'CasinoManufacturerId').merge(providers, how = 'left', on = 'CasinoProviderId')

### Add age column to create later AgeGroup column  
Instead of now any particular date can be used to do the following calculations

In [None]:
## Instead of now any particular date can be used to do the following calculations\
now = pd.to_datetime('now')

In [None]:
now

In [None]:
final_table.columns

In [None]:
final_table = final_table[['Date', 'Country', 'Sex', 'VIPStatus', 'BirthDate', 'GGR', 'Returns', 'CasinoManufacturerId', 'CasinoManufacturerName', 'CasinoProviderId','CasinoProviderName']]
#AgeGroup (combine casinodaily with users on BirthDate)
#CasinoProviderName (combine casinodaily with Providers on CasinoProviderId)
# GGR_EUR (casinodaily plus CurrencyRates)
# Returns_EUR (casinodaily plus CurrencyRates)

In [None]:
final_table.head(10)