In [71]:
from sqlalchemy import create_engine
import pandas as pd

In [54]:
engine = create_engine('sqlite:///chinook.db')
engine2 = create_engine('sqlite:///DW_SALES_MUSIC.db')
engine

Engine(sqlite:///chinook.db)

**Dimensión Track**

In [55]:
track_tbl = pd.read_sql_table('tracks', con=engine)
genre_tbl = pd.read_sql_table('genres', con=engine)
mediatype_tbl = pd.read_sql_table('media_types', con=engine)

In [56]:
genre_tbl.rename(columns={'Name': 'Genre'}, inplace=True)
mediatype_tbl.rename(columns={'Name': 'MediaType'}, inplace=True)

In [57]:
track_dim = track_tbl.merge(genre_tbl, on='GenreId')
track_dim = track_dim.merge(mediatype_tbl, on='MediaTypeId')


In [58]:
track_dim.drop(['GenreId', 'AlbumId', 'MediaTypeId'], axis=1, inplace=True)
track_dim.sort_values('TrackId', inplace=True)

In [59]:
track_dim.to_sql(name='dim_track', con=engine2, if_exists='append', index=False)

In [60]:
track_dim

Unnamed: 0,TrackId,Name,Composer,Milliseconds,Bytes,UnitPrice,Genre,MediaType
0,1,For Those About To Rock (We Salute You),"Angus Young, Malcolm Young, Brian Johnson",343719,11170334,0.99,Rock,MPEG audio file
3034,2,Balls to the Wall,,342562,5510424,0.99,Rock,Protected AAC audio file
3035,3,Fast As a Shark,"F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho...",230619,3990994,0.99,Rock,Protected AAC audio file
3036,4,Restless and Wild,"F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. D...",252051,4331779,0.99,Rock,Protected AAC audio file
3037,5,Princess of the Dawn,Deaffy & R.A. Smith-Diesel,375418,6290521,0.99,Rock,Protected AAC audio file
...,...,...,...,...,...,...,...,...
3266,3499,Pini Di Roma (Pinien Von Rom) \ I Pini Della V...,,286741,4718950,0.99,Classical,Protected AAC audio file
3267,3500,"String Quartet No. 12 in C Minor, D. 703 ""Quar...",Franz Schubert,139200,2283131,0.99,Classical,Protected AAC audio file
3268,3501,"L'orfeo, Act 3, Sinfonia (Orchestra)",Claudio Monteverdi,66639,1189062,0.99,Classical,Protected AAC audio file
3269,3502,"Quintet for Horn, Violin, 2 Violas, and Cello ...",Wolfgang Amadeus Mozart,221331,3665114,0.99,Classical,Protected AAC audio file


**Dimensión Artista**

In [61]:
artist_dim = pd.read_sql_table('artists', con=engine)
artist_dim.to_sql(name='dim_artist', con=engine2, if_exists='append', index=False)
artist_dim

Unnamed: 0,ArtistId,Name
0,1,AC/DC
1,2,Accept
2,3,Aerosmith
3,4,Alanis Morissette
4,5,Alice In Chains
...,...,...
270,271,"Mela Tenenbaum, Pro Musica Prague & Richard Kapp"
271,272,Emerson String Quartet
272,273,"C. Monteverdi, Nigel Rogers - Chiaroscuro; Lon..."
273,274,Nash Ensemble


**Dimensión Album**

In [62]:
album_dim = pd.read_sql_table('albums', con=engine)
album_dim.drop(['ArtistId'], axis=1, inplace=True)
album_dim.to_sql(name='dim_album', con=engine2, if_exists='append', index=False)
album_dim

Unnamed: 0,AlbumId,Title
0,1,For Those About To Rock We Salute You
1,2,Balls to the Wall
2,3,Restless and Wild
3,4,Let There Be Rock
4,5,Big Ones
...,...,...
342,343,Respighi:Pines of Rome
343,344,Schubert: The Late String Quartets & String Qu...
344,345,Monteverdi: L'Orfeo
345,346,Mozart: Chamber Music


**Dimensión Customer**

In [63]:
customer_dim = pd.read_sql_table('customers', con=engine)
customer_dim.drop(['SupportRepId'], axis=1, inplace=True)
customer_dim.to_sql(name='dim_customer', con=engine2, if_exists='append', index=False)
customer_dim

Unnamed: 0,CustomerId,FirstName,LastName,Company,Address,City,State,Country,PostalCode,Phone,Fax,Email
0,1,Luís,Gonçalves,Embraer - Empresa Brasileira de Aeronáutica S.A.,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,+55 (12) 3923-5555,+55 (12) 3923-5566,luisg@embraer.com.br
1,2,Leonie,Köhler,,Theodor-Heuss-Straße 34,Stuttgart,,Germany,70174,+49 0711 2842222,,leonekohler@surfeu.de
2,3,François,Tremblay,,1498 rue Bélanger,Montréal,QC,Canada,H2G 1A7,+1 (514) 721-4711,,ftremblay@gmail.com
3,4,Bjørn,Hansen,,Ullevålsveien 14,Oslo,,Norway,0171,+47 22 44 22 22,,bjorn.hansen@yahoo.no
4,5,František,Wichterlová,JetBrains s.r.o.,Klanova 9/506,Prague,,Czech Republic,14700,+420 2 4172 5555,+420 2 4172 5555,frantisekw@jetbrains.com
5,6,Helena,Holý,,Rilská 3174/6,Prague,,Czech Republic,14300,+420 2 4177 0449,,hholy@gmail.com
6,7,Astrid,Gruber,,"Rotenturmstraße 4, 1010 Innere Stadt",Vienne,,Austria,1010,+43 01 5134505,,astrid.gruber@apple.at
7,8,Daan,Peeters,,Grétrystraat 63,Brussels,,Belgium,1000,+32 02 219 03 03,,daan_peeters@apple.be
8,9,Kara,Nielsen,,Sønder Boulevard 51,Copenhagen,,Denmark,1720,+453 3331 9991,,kara.nielsen@jubii.dk
9,10,Eduardo,Martins,Woodstock Discos,"Rua Dr. Falcão Filho, 155",São Paulo,SP,Brazil,01007-010,+55 (11) 3033-5446,+55 (11) 3033-4564,eduardo@woodstock.com.br


**Dimensión Time**

In [64]:
def create_time_table(start='2009-01-01', end='2013-12-22'):
    df = pd.DataFrame({'date': pd.date_range(start, end)})
    df['TimeId'] = df.index + 1
    df['year'] = df.date.dt.year
    df['month'] = df.date.dt.month
    df['month_name'] = df.date.dt.month_name()
    df['day'] = df.date.dt.day
    df['day_name'] = df.date.dt.day_name()
    df['day_week'] = df.date.dt.dayofweek
    df['week'] = df.date.dt.weekofyear
    df['quarter'] = df.date.dt.quarter
    
    df = df[['TimeId', 'date', 'year', 'month', 'month_name', 'day', 'day_name', 'day_week', 'week', 'quarter']] 
    
    return df

In [65]:
time_dim = create_time_table()
time_dim.to_sql(name='dim_time', con=engine2, if_exists='append', index=False)
time_dim

  # Remove the CWD from sys.path while we load stuff.


Unnamed: 0,TimeId,date,year,month,month_name,day,day_name,day_week,week,quarter
0,1,2009-01-01,2009,1,January,1,Thursday,3,1,1
1,2,2009-01-02,2009,1,January,2,Friday,4,1,1
2,3,2009-01-03,2009,1,January,3,Saturday,5,1,1
3,4,2009-01-04,2009,1,January,4,Sunday,6,1,1
4,5,2009-01-05,2009,1,January,5,Monday,0,2,1
...,...,...,...,...,...,...,...,...,...,...
1812,1813,2013-12-18,2013,12,December,18,Wednesday,2,51,4
1813,1814,2013-12-19,2013,12,December,19,Thursday,3,51,4
1814,1815,2013-12-20,2013,12,December,20,Friday,4,51,4
1815,1816,2013-12-21,2013,12,December,21,Saturday,5,51,4


**Dimensión Location**

In [67]:
location_dim = pd.read_sql_table('invoices', con=engine)
location_dim.rename(columns={'InvoiceId': 'LocationId'}, inplace=True)
location_dim = location_dim.drop(['CustomerId','InvoiceDate', 'Total', 'BillingState', 'BillingPostalCode'], axis=1)
location_dim.to_sql(name='dim_location', con=engine2, if_exists='append', index=False)
location_dim

Unnamed: 0,LocationId,BillingAddress,BillingCity,BillingCountry
0,1,Theodor-Heuss-Straße 34,Stuttgart,Germany
1,2,Ullevålsveien 14,Oslo,Norway
2,3,Grétrystraat 63,Brussels,Belgium
3,4,8210 111 ST NW,Edmonton,Canada
4,5,69 Salem Street,Boston,USA
...,...,...,...,...
407,408,319 N. Frances Street,Madison,USA
408,409,796 Dundas Street West,Toronto,Canada
409,410,"Rua dos Campeões Europeus de Viena, 4350",Porto,Portugal
410,411,Porthaninkatu 9,Helsinki,Finland


#Tabla de hechos

In [74]:
invItems = pd.read_sql_table('invoice_items', con=engine)
invoice = pd.read_sql_table('invoices', con=engine)
tracks = pd.read_sql_table('tracks', con=engine)
albums = pd.read_sql_table('albums', con=engine)

invoiceFact = pd.merge(invItems, invoice[['InvoiceId','InvoiceDate', 'CustomerId', 'Total']], on='InvoiceId', how='left')
invoiceFact = pd.merge(invoiceFact, tracks[['TrackId','AlbumId']], on='TrackId', how='left')
invoiceFact = pd.merge(invoiceFact, albums[['AlbumId','ArtistId']], on='AlbumId', how='left')
invoiceFact = invoiceFact.merge(time_dim[['date', 'TimeId']], left_on='InvoiceDate', right_on='date')
invoice_fact_dim = invoiceFact[['InvoiceLineId', 'InvoiceId', 'TrackId', 'TimeId', 'CustomerId', 'AlbumId', 'ArtistId','Total']]


In [81]:
#invoice_fact_dim.to_sql('invoice_fact', engine, index=False, method='multi')
invoice_fact = pd.read_sql_table('invoice_fact', con=engine)
invoice_fact.rename(columns={'InvoiceId': 'LocationId'}, inplace=True)
invoice_fact.rename(columns={'InvoiceLineId': 'FactSalesId'}, inplace=True)
invoice_fact.to_sql(name='Fact_Sales', con=engine2, if_exists='append', index=False)

In [83]:
Fact_Sales = pd.read_sql_table('Fact_Sales', con=engine2)
Fact_Sales

Unnamed: 0,FactSalesId,AlbumId,ArtistId,TrackId,CustomerId,TimeId,LocationId,Total
0,1,2,2,2,2,1,1,1.98
1,2,3,2,4,2,1,1,1.98
2,3,1,1,6,4,2,2,3.96
3,4,1,1,8,4,2,2,3.96
4,5,1,1,10,4,2,2,3.96
...,...,...,...,...,...,...,...,...
2235,2236,141,100,3136,44,1809,411,13.86
2236,2237,141,100,3145,44,1809,411,13.86
2237,2238,248,155,3154,44,1809,411,13.86
2238,2239,248,155,3163,44,1809,411,13.86
