## Importing Libraries

In [23]:
import json
import requests
import pandas as pd
import numpy as np
import datetime as dt
from pandas.io.json import json_normalize
import mysql.connector
import sqlalchemy

## Reading in Q1 orders data from Namaste Tech Github

In [18]:
# raw Github url for orders.json 
orders_data_url = "https://raw.githubusercontent.com/namasteTechnologies/data-analyst-challenge/master/orders.json"
orders_data_resp = requests.get(orders_data_url)
orders_data = json.loads(orders_data_resp.text)
print(orders_data[0])

{'id': 8369263756632563, 'customer': {'id': 47178, 'name': 'William Doe', 'email': 'william.doe@gmail.com'}, 'total_price': 79.39, 'created_at': '2020-03-07T14:31:11Z', 'line_items': [{'id': 610448, 'product_id': 632910392, 'product_sku': 'PPLEPUNCH20-05', 'product_name': 'Purple Punch 2.0 Dried Flower', 'price': 29.4}, {'id': 997208, 'product_id': 278266679, 'product_sku': 'GPSTASH-01', 'product_name': "Grandpa's Stash Dried Flower", 'price': 49.99}]}


In [140]:
# converting the raw data to pandas dataframe
orders_df = pd.DataFrame(orders_data)

# changing the data type for created_at to datetime and id to str
orders_df['created_at'] = orders_df['created_at'].astype('datetime64[ns]')
orders_df['id'] = orders_df['id'].astype('str')

# exploding the list if dictionaries in line_items columns to create a separate row for each element in the list
orders_df=orders_df.explode("line_items").reset_index().drop(['index'], axis=1)

# function to convert the dictionaries in the columns to separate columns:
## the keys of the dictionaries will form the columns names and values to the keys will fill up the columns
## dict_column='a' is a random placeholder for string input
def dict_to_col(df, dict_column="a"):
    
    # getting the dictionary keys in a list
    cols = list(df[dict_column][0].keys())
    
    # renaming the columns to add the upper level column name a prefix
    renamed_cols = [dict_column+"_"+x for x in cols]
    
    # creating an empty dictionary with dictionary keys as the columns
    dict_df = pd.DataFrame(columns=cols)
    
    # iterating over the rows to assign values of the keys to their respective columns in the dataframe
    for i,r in df.iterrows():
        dict_values = r[dict_column]
        for x in cols:
            dict_df.loc[i,x]=dict_values[x]
    
    # renaming the columns as per the prefixed names, joining to the orginal dataframe and dropping the dictionary column
    dict_df.columns=renamed_cols
    df = pd.concat([df, dict_df], axis=1)
    df = df.drop([dict_column], axis=1)
    
    return df

# applying the function to the two columns
orders_df = dict_to_col(orders_df, dict_column="customer")
orders_df = dict_to_col(orders_df, dict_column="line_items")

# changing the data type for line_items_price to float
orders_df['line_items_price'] = orders_df['line_items_price'].astype('float64')

# renaming the order based columns
orders_df.rename(columns={'id':'order_id',
                          'created_at':'order_created_date',
                          'total_price':'total_order_price_USD'},
                 inplace=True)

print(orders_df.shape)
print(orders_df.dtypes)
orders_df.head(5)

(37, 11)
order_id                           object
total_order_price_USD             float64
order_created_date         datetime64[ns]
customer_id                        object
customer_name                      object
customer_email                     object
line_items_id                      object
line_items_product_id              object
line_items_product_sku             object
line_items_product_name            object
line_items_price                  float64
dtype: object


Unnamed: 0,order_id,total_order_price_USD,order_created_date,customer_id,customer_name,customer_email,line_items_id,line_items_product_id,line_items_product_sku,line_items_product_name,line_items_price
0,8369263756632563,79.39,2020-03-07 14:31:11,47178,William Doe,william.doe@gmail.com,610448,632910392,PPLEPUNCH20-05,Purple Punch 2.0 Dried Flower,29.4
1,8369263756632563,79.39,2020-03-07 14:31:11,47178,William Doe,william.doe@gmail.com,997208,278266679,GPSTASH-01,Grandpa's Stash Dried Flower,49.99
2,7262170348080494,86.98,2020-03-01 09:16:30,94720,Emile Tumson,emile.tumson@gmail.com,997208,278266679,GPSTASH-01,Grandpa's Stash Dried Flower,49.99
3,7262170348080494,86.98,2020-03-01 09:16:30,94720,Emile Tumson,emile.tumson@gmail.com,759658,573794972,SAGENSOUR-01,Sage N Sour Dried Flower,36.99
4,7722055557038194,36.99,2020-01-01 09:17:03,59933,Ethan Jones,ethan.jones@gmail.com,759658,573794972,SAGENSOUR-01,Sage N Sour Dried Flower,36.99


## Reading in exchnage rates from https://exchangeratesapi.io/

In [227]:
forex_df = pd.DataFrame({"date":pd.date_range(start='2019-12-27', end='2020-03-31', freq='D')})

start_date = forex_df['date'].min().date().strftime("%Y-%m-%d")
end_date = forex_df['date'].max().date().strftime("%Y-%m-%d")

rates_data_url = "https://api.exchangeratesapi.io/history"
selection_data = {"start_at":start_date, "end_at":end_date, "base":"USD", "symbols":"USD,CAD"}
rates_data_resp = requests.get(rates_data_url, params=selection_data)
rates_df = pd.DataFrame(json.loads(rates_data_resp.text))

rates_df['forex'] = rates_df['rates'].apply(lambda x: x['CAD'])
rates_df.reset_index(drop=False, inplace=True)
rates_df.rename(columns={'index':'date'}, inplace=True)
rates_df = rates_df[['date','forex']]
rates_df['date'] = rates_df['date'].astype('datetime64[ns]')

forex_df = pd.merge(forex_df, rates_df, on='date', how='left')
forex_df.fillna(method='ffill', inplace=True)
forex_df.head()

Unnamed: 0,date,forex
0,2019-12-27,1.308348
1,2019-12-28,1.308348
2,2019-12-29,1.308348
3,2019-12-30,1.30673
4,2019-12-31,1.299448


In [None]:
mydb = mysql.connector.connect(
  host="localhost",
  user="root",
  passwd="hello123"
)

print(mydb)

In [None]:
import sqlalchemy
engine = sqlalchemy.create_engine('mysql+mysqlconnector://root@127.0.0.1:3306/name_of_your_database', echo=True)

In [None]:
%load_ext sql

In [None]:
%sql mysql+mysqlconnector://root:hello123@127.0.0.1:3306/name_of_your_database

In [None]:
%%sql 

create table car (make VARCHAR(20), model VARCHAR(20), owner VARCHAR(20), year DATE);

In [None]:
%%sql

describe car;