# Import Library

In [12]:
import pandas as pd
import sqlite3
import requests
import io

conn = sqlite3.connect("./data/chinook.db")

In [2]:
chinook = pd.read_sql_query(
    """
    SELECT (C.FirstName||' '||C.LastName) as FullName , I.BillingCountry as Country, I.InvoiceDate, I.Total
    FROM customers as C
    LEFT JOIN invoices as I
    ON C.CustomerId = I.CustomerId
    """, conn, parse_dates="InvoiceDate")

chinook.pivot_table(index="FullName", values="Total", aggfunc="sum").sort_values("Total", ascending=False).head()

Unnamed: 0_level_0,Total
FullName,Unnamed: 1_level_1
Helena Holý,49.62
Richard Cunningham,47.62
Luis Rojas,46.62
Ladislav Kovács,45.62
Hugh O'Reilly,45.62


## Fetch Song Tracks and Convert to CSV file

- [X] Take data from joining minimum of 4 table

In [63]:
tracks = pd.read_sql_query(
 '''
 SELECT 
 tracks.TrackId, tracks.Name as Song, artists.Name as Artist, albums.Title as Album, tracks.Composer, genres.Name as Genre, tracks.UnitPrice
 FROM tracks
 LEFT JOIN albums ON albums.AlbumId = tracks.AlbumId
 LEFT JOIN artists ON artists.ArtistId = albums.AlbumId
 LEFT JOIN genres ON genres.GenreId = tracks.GenreId
 ''', conn)

tracks.to_csv("./data/tracks.csv", index=False)
tracks.head()

Unnamed: 0,TrackId,Song,Artist,Album,Composer,Genre,UnitPrice
0,1,For Those About To Rock (We Salute You),AC/DC,For Those About To Rock We Salute You,"Angus Young, Malcolm Young, Brian Johnson",Rock,0.99
1,2,Balls to the Wall,Accept,Balls to the Wall,,Rock,0.99
2,3,Fast As a Shark,Aerosmith,Restless and Wild,"F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho...",Rock,0.99
3,4,Restless and Wild,Aerosmith,Restless and Wild,"F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. D...",Rock,0.99
4,5,Princess of the Dawn,Aerosmith,Restless and Wild,Deaffy & R.A. Smith-Diesel,Rock,0.99


## Top 5 Country

- [X] Datetime operation
- [X] Categorical operation
- [X] Frequencies analysis
- [ ] Missing Value and Duplicates operation

In [65]:
customers = pd.read_sql_query(
    """
    SELECT (C.FirstName||' '||C.LastName) as FullName , I.BillingCountry as Country, I.InvoiceDate, I.Total
    FROM customers as C
    LEFT JOIN invoices as I
    ON C.CustomerId = I.CustomerId
    """, conn, parse_dates="InvoiceDate")

# customers.pivot_table(index="FullName", values="Total", aggfunc="sum").sort_values("Total", ascending=False).head()
customers



Unnamed: 0,FullName,Country,InvoiceDate,Total
0,Luís Gonçalves,Brazil,2010-03-11,3.98
1,Luís Gonçalves,Brazil,2010-06-13,3.96
2,Luís Gonçalves,Brazil,2010-09-15,5.94
3,Luís Gonçalves,Brazil,2011-05-06,0.99
4,Luís Gonçalves,Brazil,2012-10-27,1.98
...,...,...,...,...
407,Puja Srivastava,India,2009-07-08,5.94
408,Puja Srivastava,India,2010-02-26,1.99
409,Puja Srivastava,India,2011-08-20,1.98
410,Puja Srivastava,India,2011-09-30,13.86


In [70]:
top5 = customers.groupby('Country').Total.sum().sort_values(ascending=False).head().index.to_list()
top5_data = customers[customers['Country'].isin(top5)].copy()

dayorder = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']


In [71]:
top5_data['InvoiceDOW'] = top5_data['InvoiceDate'].dt.day_name()
top5_data['InvoiceDOW'] = pd.Categorical(top5_data['InvoiceDOW'],
                                         categories=dayorder,
                                         ordered=True)
top5_data

Unnamed: 0,FullName,Country,InvoiceDate,Total,InvoiceDOW
0,Luís Gonçalves,Brazil,2010-03-11,3.98,Thursday
1,Luís Gonçalves,Brazil,2010-06-13,3.96,Sunday
2,Luís Gonçalves,Brazil,2010-09-15,5.94,Wednesday
3,Luís Gonçalves,Brazil,2011-05-06,0.99,Friday
4,Luís Gonçalves,Brazil,2012-10-27,1.98,Saturday
...,...,...,...,...,...
296,Isabelle Mercier,France,2010-07-15,5.94,Thursday
297,Isabelle Mercier,France,2011-03-05,0.99,Saturday
298,Isabelle Mercier,France,2012-08-26,1.98,Sunday
299,Isabelle Mercier,France,2012-10-06,16.86,Saturday


In [16]:
# book_url = 'https://algo-capstone.herokuapp.com/data/get/books_c.csv'
# s = requests.get(book_url)
# s_df = pd.DataFrame(s.json())
# s_df

In [8]:
pd.read_csv('./data/pulsar_stars.csv').head()

Unnamed: 0,Mean of the integrated profile,Standard deviation of the integrated profile,Excess kurtosis of the integrated profile,Skewness of the integrated profile,Mean of the DM-SNR curve,Standard deviation of the DM-SNR curve,Excess kurtosis of the DM-SNR curve,Skewness of the DM-SNR curve,target_class
0,140.5625,55.683782,-0.234571,-0.699648,3.199833,19.110426,7.975532,74.242225,0
1,102.507812,58.88243,0.465318,-0.515088,1.677258,14.860146,10.576487,127.39358,0
2,103.015625,39.341649,0.323328,1.051164,3.121237,21.744669,7.735822,63.171909,0
3,136.75,57.178449,-0.068415,-0.636238,3.642977,20.95928,6.896499,53.593661,0
4,88.726562,40.672225,0.600866,1.123492,1.17893,11.46872,14.269573,252.567306,0


In [15]:
# pd.read_csv('https://algo-capstone.herokuapp.com/data/get/equal/books_c.csv/isbn/0439785960').head()

In [18]:
heroku_url = 'https://algo-capstone.herokuapp.com/data/get/books_c.csv'
r = requests.get(heroku_url)
r_pd = pd.DataFrame(r.json())
r_pd.head()

Unnamed: 0,bookID,title,authors,average_rating,isbn,isbn13,language_code,# num_pages,ratings_count,text_reviews_count
0,1,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling,4.56,0439785960,9780439785969,eng,652,1944099,26249
1,2,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling,4.49,0439358078,9780439358071,eng,870,1996446,27613
2,3,Harry Potter and the Sorcerer's Stone (Harry P...,J.K. Rowling,4.47,0439554934,9780439554930,eng,320,5629932,70390
3,4,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,4.41,0439554896,9780439554893,eng,352,6267,272
4,5,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling,4.55,043965548X,9780439655484,eng,435,2149872,33964
