<a href="https://colab.research.google.com/github/lunchbox601/Portfolio/blob/main/Python_%2B_SQL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [39]:
# Import Step
# Template + Guide to use SQL in Google Colab/Jupytyr Notebook
import pandas as pd
import sqlite3

In [40]:
# Setup Step
def pd_to_sqlDB(input_df: pd.DataFrame,
                table_name: str,
                db_name: str = 'default.db') -> None:

    '''Take a Pandas dataframe `input_df` and upload it to `table_name` SQLITE table

    Args:
        input_df (pd.DataFrame): Dataframe containing data to upload to SQLITE
        table_name (str): Name of the SQLITE table to upload to
        db_name (str, optional): Name of the SQLITE Database in which the table is created. 
                                 Defaults to 'default.db'.
    '''

    # Step 1: Setup local logging
    import logging
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s %(levelname)s: %(message)s',
                        datefmt='%Y-%m-%d %H:%M:%S')

    # Step 2: Find columns in the dataframe
    cols = input_df.columns
    cols_string = ','.join(cols)
    val_wildcard_string = ','.join(['?'] * len(cols))

    # Step 3: Connect to a DB file if it exists, else crete a new file
    con = sqlite3.connect(db_name)
    cur = con.cursor()
    logging.info(f'SQL DB {db_name} created')

    # Step 4: Create Table
    sql_string = f"""CREATE TABLE {table_name} ({cols_string});"""
    cur.execute(sql_string)
    logging.info(f'SQL Table {table_name} created with {len(cols)} columns')

    # Step 5: Upload the dataframe
    rows_to_upload = input_df.to_dict(orient='split')['data']
    sql_string = f"""INSERT INTO {table_name} ({cols_string}) VALUES ({val_wildcard_string});"""
    cur.executemany(sql_string, rows_to_upload)
    logging.info(f'{len(rows_to_upload)} rows uploaded to {table_name}')
  
    # Step 6: Commit the changes and close the connection
    con.commit()
    con.close()


def sql_query_to_pd(sql_query_string: str, db_name: str ='default.db') -> pd.DataFrame:
    '''Execute an SQL query and return the results as a pandas dataframe

    Args:
        sql_query_string (str): SQL query string to execute
        db_name (str, optional): Name of the SQLITE Database to execute the query in.
                                 Defaults to 'default.db'.

    Returns:
        pd.DataFrame: Results of the SQL query in a pandas dataframe
    '''    
    # Step 1: Connect to the SQL DB
    con = sqlite3.connect(db_name)

    # Step 2: Execute the SQL query
    cursor = con.execute(sql_query_string)

    # Step 3: Fetch the data and column names
    result_data = cursor.fetchall()
    cols = [description[0] for description in cursor.description]

    # Step 4: Close the connection
    con.close()

    # Step 5: Return as a dataframe
    return pd.DataFrame(result_data, columns=cols)

In [41]:
# File Upload
input_df = pd.read_csv('/content/international_debt.csv')

# After first time, you do this. If you try to reload the code, you'll get the "Table Exists" error code
# Setting the table, first time step
#pd_to_sqlDB(input_df,
           # table_name='international_debt',
           #db_name='default.db')

# Show first ten rows of dataset
sql_query_string = """
SELECT *
FROM international_debt
LIMIT 10
"""

# String to run the query
result_df = sql_query_to_pd(sql_query_string, db_name='default.db')
result_df

Unnamed: 0,country_name,country_code,indicator_name,indicator_code,debt
0,Afghanistan,AFG,"Disbursements on external debt, long-term (DIS...",DT.DIS.DLXF.CD,72894453.7
1,Afghanistan,AFG,"Interest payments on external debt, long-term ...",DT.INT.DLXF.CD,53239440.1
2,Afghanistan,AFG,"PPG, bilateral (AMT, current US$)",DT.AMT.BLAT.CD,61739336.9
3,Afghanistan,AFG,"PPG, bilateral (DIS, current US$)",DT.DIS.BLAT.CD,49114729.4
4,Afghanistan,AFG,"PPG, bilateral (INT, current US$)",DT.INT.BLAT.CD,39903620.1
5,Afghanistan,AFG,"PPG, multilateral (AMT, current US$)",DT.AMT.MLAT.CD,39107845.0
6,Afghanistan,AFG,"PPG, multilateral (DIS, current US$)",DT.DIS.MLAT.CD,23779724.3
7,Afghanistan,AFG,"PPG, multilateral (INT, current US$)",DT.INT.MLAT.CD,13335820.0
8,Afghanistan,AFG,"PPG, official creditors (AMT, current US$)",DT.AMT.OFFT.CD,100847181.9
9,Afghanistan,AFG,"PPG, official creditors (DIS, current US$)",DT.DIS.OFFT.CD,72894453.7


In [42]:
# Determine number of unique/distinct countries within dataset
sql_query_string = """
SELECT 
COUNT(DISTINCT country_name) AS total_distinct_countries
FROM international_debt
"""

result_df = sql_query_to_pd(sql_query_string, db_name='default.db')
result_df

Unnamed: 0,total_distinct_countries
0,124


In [43]:
# Determine distinct debt indicators, aka sources of debt
sql_query_string = """
SELECT
DISTINCT indicator_code AS distinct_debt_indicators
FROM international_debt
ORDER BY distinct_debt_indicators
"""

result_df = sql_query_to_pd(sql_query_string, db_name='default.db')
result_df

Unnamed: 0,distinct_debt_indicators
0,DT.AMT.BLAT.CD
1,DT.AMT.DLXF.CD
2,DT.AMT.DPNG.CD
3,DT.AMT.MLAT.CD
4,DT.AMT.OFFT.CD
5,DT.AMT.PBND.CD
6,DT.AMT.PCBK.CD
7,DT.AMT.PROP.CD
8,DT.AMT.PRVT.CD
9,DT.DIS.BLAT.CD


In [44]:
# How much debt does entire world owe?
sql_query_string = """
SELECT
ROUND(SUM(debt)/1000000, 2) AS total_debt
FROM international_debt
"""

result_df = sql_query_to_pd(sql_query_string, db_name='default.db')
result_df

Unnamed: 0,total_debt
0,3079734.49


In [45]:
# What is country with highest debt?
sql_query_string = """
SELECT
country_name,
SUM(debt) AS total_debt
FROM international_debt
GROUP BY country_name
ORDER BY total_debt DESC
LIMIT 1
"""

result_df = sql_query_to_pd(sql_query_string, db_name='default.db')
result_df

Unnamed: 0,country_name,total_debt
0,China,285793500000.0


In [46]:
# How is the debt distributed?
sql_query_string = """
SELECT
indicator_code AS debt_indicator,
indicator_name,
AVG(debt) as average_debt
FROM international_debt
GROUP BY debt_indicator, indicator_name
ORDER BY average_debt DESC
LIMIT 10
"""

result_df = sql_query_to_pd(sql_query_string, db_name='default.db')
result_df

Unnamed: 0,debt_indicator,indicator_name,average_debt
0,DT.AMT.DLXF.CD,"Principal repayments on external debt, long-te...",5904868000.0
1,DT.AMT.DPNG.CD,"Principal repayments on external debt, private...",5161194000.0
2,DT.DIS.DLXF.CD,"Disbursements on external debt, long-term (DIS...",2152041000.0
3,DT.DIS.OFFT.CD,"PPG, official creditors (DIS, current US$)",1958983000.0
4,DT.AMT.PRVT.CD,"PPG, private creditors (AMT, current US$)",1803694000.0
5,DT.INT.DLXF.CD,"Interest payments on external debt, long-term ...",1644024000.0
6,DT.DIS.BLAT.CD,"PPG, bilateral (DIS, current US$)",1223139000.0
7,DT.INT.DPNG.CD,"Interest payments on external debt, private no...",1220411000.0
8,DT.AMT.OFFT.CD,"PPG, official creditors (AMT, current US$)",1191188000.0
9,DT.AMT.PBND.CD,"PPG, bonds (AMT, current US$)",1082624000.0


In [47]:
# Who has the highest amount of principal repayment?
sql_query_string = """
SELECT
country_name,
indicator_name
FROM international_debt
WHERE debt = (SELECT
MAX(debt)
FROM international_debt
WHERE indicator_code='DT.AMT.DLXF.CD')
"""

result_df = sql_query_to_pd(sql_query_string, db_name='default.db')
result_df

Unnamed: 0,country_name,indicator_name
0,China,"Principal repayments on external debt, long-te..."


In [48]:
# What is most common type of debt?
sql_query_string = """
SELECT
indicator_code,
COUNT(indicator_code) AS indicator_count
FROM international_debt
GROUP BY indicator_code
ORDER BY indicator_count DESC, indicator_code DESC
LIMIT 20
"""

result_df = sql_query_to_pd(sql_query_string, db_name='default.db')
result_df

Unnamed: 0,indicator_code,indicator_count
0,DT.INT.OFFT.CD,124
1,DT.INT.MLAT.CD,124
2,DT.INT.DLXF.CD,124
3,DT.AMT.OFFT.CD,124
4,DT.AMT.MLAT.CD,124
5,DT.AMT.DLXF.CD,124
6,DT.DIS.DLXF.CD,123
7,DT.INT.BLAT.CD,122
8,DT.DIS.OFFT.CD,122
9,DT.AMT.BLAT.CD,122


In [49]:
# Top 10 max debt filtered by country
sql_query_string = """
SELECT
country_name,
MAX(debt) AS maximum_debt
FROM international_debt
GROUP BY country_name
ORDER BY maximum_debt DESC
LIMIT 10
"""

result_df = sql_query_to_pd(sql_query_string, db_name='default.db')
result_df

Unnamed: 0,country_name,maximum_debt
0,China,96218620000.0
1,Brazil,90041840000.0
2,Russian Federation,66589760000.0
3,Turkey,51555030000.0
4,South Asia,48756300000.0
5,Least developed countries: UN classification,40160770000.0
6,IDA only,34531190000.0
7,India,31923510000.0
8,Indonesia,30916110000.0
9,Kazakhstan,27482090000.0
