# Exploration & Cleaning of the raw data

Global Food Price Inflation

In [1]:
import psycopg2
import pandas as pd

df_country = pd.read_csv("WLD_RTFP_country_2023-10-02.csv")
df_details = pd.read_csv("WLD_RTP_details_2023-10-02.csv")

In [2]:
df_country.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4798 entries, 0 to 4797
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       4734 non-null   float64
 1   High       4734 non-null   float64
 2   Low        4734 non-null   float64
 3   Close      4734 non-null   float64
 4   Inflation  4434 non-null   float64
 5   country    4798 non-null   object 
 6   ISO3       4798 non-null   object 
 7   date       4798 non-null   object 
dtypes: float64(5), object(3)
memory usage: 300.0+ KB


In [3]:
n = 0
l = []
for i in range(len(df_country)):
    n =+ i
    l.append("r" + str(n))

df_country["records"] = l
df_country.head(3)

Unnamed: 0,Open,High,Low,Close,Inflation,country,ISO3,date,records
0,0.53,0.54,0.53,0.53,,Afghanistan,AFG,2007-01-01,r0
1,0.53,0.54,0.53,0.53,,Afghanistan,AFG,2007-02-01,r1
2,0.54,0.54,0.53,0.53,,Afghanistan,AFG,2007-03-01,r2


In [4]:
df_country.columns

Index(['Open', 'High', 'Low', 'Close', 'Inflation', 'country', 'ISO3', 'date',
       'records'],
      dtype='object')

In [5]:
df_details.columns

Index(['country', 'iso3', 'components', 'currency', 'start_date_observations',
       'end_date_observations', 'number_of_markets_modeled',
       'number_of_markets_covered', 'number_of_food_items',
       'number_of_observations_food', 'number_of_observations_other',
       'data_coverage_food', 'data_coverage_previous_12_months_food',
       'total_food_price_increase_since_start_date',
       'average_annualized_food_inflation', 'maximum_food_drawdown',
       'average_annualized_food_volatility',
       'average_monthly_food_price_correlation_between_markets',
       'average_annual_food_price_correlation_between_markets',
       'Rsquared_individual_food_items', 'Rsquared_individual_other_items',
       'index_confidence_score', 'imputation_model'],
      dtype='object')

# Creation & Connection to a new DB

In [6]:
def create_database():

    # Connect to the default DB
    conn = psycopg2.connect(
        database = 'postgres',
        user = 'postgres',
        password = '@SSitanecl94',
        host = '127.0.0.1', # '127.0.0.1' is the IP address of the localhost. I could've written also 'localhost'.
        port = 5432
    )
    conn.set_session(autocommit= True) # this will automatically enables changes to your DB after each execution of a query.
    
    #Create a connection between the Connector and the cursor 
    cur = conn.cursor()

    #Create sparkify database with UTF8 encoding
    cur.execute("DROP DATABASE IF EXISTS accounts")
    cur.execute("CREATE DATABASE accounts")

    # Close connection to the default database
    conn.close()


    # Connect to sparkify database
    conn = psycopg2.connect(
        database = 'accounts',
        user = 'postgres',
        password = '@SSitanecl94',
        host = '127.0.0.1', 
        port = 5432
    )
    cur = conn.cursor()

    return cur, conn

# Data Modelling & Create Tables

In [7]:
# CREATE TABLES

create_details = """
    CREATE TABLE inflation_details 
    (
        country VARCHAR(50) PRIMARY KEY, 
        iso3 VARCHAR(6),
        components TEXT,
        currency VARCHAR(6),
        start_date_observations VARCHAR(20),
        end_date_observations VARCHAR(20),
        number_of_markets_modeled INT,
        number_of_markets_covered INT,
        number_of_food_items INT,
        number_of_observations_food TEXT,
        number_of_observations_other TEXT,
        data_coverage_food VARCHAR(200),
        data_coverage_previous_12_months_food VARCHAR(200),                    
        total_food_price_increase_since_start_date VARCHAR(200),
        average_annualized_food_inflation VARCHAR(200),
        maximum_food_drawdown VARCHAR(200),
        average_annualized_food_volatility VARCHAR(200),
        average_monthly_food_price_correlation_between_markets VARCHAR(200),
        average_annual_food_price_correlation_between_markets VARCHAR(200),
        Rsquared_individual_food_items TEXT,
        Rsquared_individual_other_items TEXT,
        index_confidence_score NUMERIC,
        imputation_model TEXT
    );
"""

create_country = """
    CREATE TABLE inflation_country 
    (
        Open NUMERIC, 
        High NUMERIC, 
        Low NUMERIC, 
        Close NUMERIC, 
        Inflation  NUMERIC, 
        country VARCHAR (50), 
        ISO3 VARCHAR(10), 
        date VARCHAR(50),
        records VARCHAR(5) PRIMARY KEY,
        FOREIGN KEY (country) REFERENCES inflation_details(country)
    );
"""

create_tables_queries = [create_details, create_country]

In [8]:
# Create the database
cur, conn =  create_database()

In [9]:
# def drop_tables(cur, conn):
#     for query in drop_tables_queries:
#         cur.execute(query)
#         conn.commit()

def create_tables(cur, conn):
    for query in create_tables_queries:
        cur.execute(query)
        conn.commit()


# drop_tables(cur, conn)
create_tables(cur, conn)

# Insert data into tables

In [10]:
insert_inflation_details = """
    INSERT INTO inflation_details ( 
        country, iso3, components, currency, start_date_observations, end_date_observations, number_of_markets_modeled,
        number_of_markets_covered, number_of_food_items, number_of_observations_food, number_of_observations_other,
        data_coverage_food, data_coverage_previous_12_months_food, total_food_price_increase_since_start_date,
        average_annualized_food_inflation, maximum_food_drawdown, average_annualized_food_volatility,
        average_monthly_food_price_correlation_between_markets, average_annual_food_price_correlation_between_markets,
        Rsquared_individual_food_items, Rsquared_individual_other_items, index_confidence_score, imputation_model
        )
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    ;
"""

insert_inflation_country = """
    INSERT INTO inflation_country (Open, High, Low, Close, Inflation, country, ISO3, date, records)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
    ;
"""

# %s: this (wildcard) is used to filled in data in their positions.

In [11]:
# Insert data into table : inflation_details
for index, row in df_details.iterrows():
    cur.execute(insert_inflation_details, list(row))
    conn.commit()

# Insert data into table : inflation_country
for index, row in df_country.iterrows():
    cur.execute(insert_inflation_country, list(row))
    conn.commit()

# Close Cursor & Connection

In [None]:
cur.close()
conn.close()