In [1]:
from flask import Flask, render_template, jsonify, redirect
import pymongo
#from flask_pymongo import PyMongo
from pymongo import MongoClient
import numpy as np
import pandas as pd
import datetime as dt
import pandas as pd

# Reflect Tables into SQLAlchemy ORM

In [2]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect
import sqlite3

# Connecting to the relational database
## Source: sqlite database from Kaggle Website

In [3]:
# Path to sqlite
database_path = "../Data/wdi_kaggle.sqlite"
engine = create_engine(f"sqlite:///{database_path}")
conn=engine.connect()

#get table names from database
for table_name in inspect(engine).get_table_names():
   print(table_name)

Country
CountryNotes
Footnotes
Indicators
Series
SeriesNotes


## Tables and exporting them to a Pandas DataFrame

In [4]:
Country_df=pd.read_sql('SELECT CountryCode, Region, IncomeGroup FROM Country',conn)
Indicators_df=pd.read_sql('SELECT * FROM Indicators',conn)
Series_df=pd.read_sql('SELECT SeriesCode, Topic, LongDefinition, AggregationMethod, LimitationsAndExceptions, Source, StatisticalConceptAndMethodology FROM Series',conn)

#### We realized that there are two codes (IndicatorCode in Indicator table and SeriesCode in Series table). We needed to confirm that these two codes are exactly the same and that there is no difference between them (i.e., diff_Ind_Series is Null), then we merge Series and Indicator tables based on this common column.

In [5]:
#find number of indicator and series codes
Indicators_df["IndicatorCode"].nunique()
Series_df["SeriesCode"].nunique()

1345

In [6]:
#confirm that there are no differences between indicator and series codes from both tables
series = set(Series_df.SeriesCode)
diff_Ind_Series = [x for x in Indicators_df.IndicatorCode if x not in series]
diff_Ind_Series

[]

### Now, we merge three DataFrames

In [7]:
Ind_Country=Indicators_df.merge(Country_df, left_on='CountryCode', right_on='CountryCode')

In [8]:
Ind_Country_Series=Ind_Country.merge(Series_df, left_on='IndicatorCode', right_on='SeriesCode')

####### Other option: Indictors = engine.execute('SELECT * FROM Indicators join Country on Indicators.CountryCode=Country.CountryCode').fetchall()

In [9]:
Ind_Country_Series.drop(['SeriesCode'],axis=1)

Unnamed: 0,CountryName,CountryCode,IndicatorName,IndicatorCode,Year,Value,Region,IncomeGroup,Topic,LongDefinition,AggregationMethod,LimitationsAndExceptions,Source,StatisticalConceptAndMethodology
0,Arab World,ARB,"Adolescent fertility rate (births per 1,000 wo...",SP.ADO.TFRT,1960,1.335609e+02,,,Health: Reproductive health,Adolescent fertility rate is the number of bir...,Weighted average,,"United Nations Population Division, World Popu...",Reproductive health is a state of physical and...
1,Arab World,ARB,"Adolescent fertility rate (births per 1,000 wo...",SP.ADO.TFRT,1961,1.341644e+02,,,Health: Reproductive health,Adolescent fertility rate is the number of bir...,Weighted average,,"United Nations Population Division, World Popu...",Reproductive health is a state of physical and...
2,Arab World,ARB,"Adolescent fertility rate (births per 1,000 wo...",SP.ADO.TFRT,1962,1.348610e+02,,,Health: Reproductive health,Adolescent fertility rate is the number of bir...,Weighted average,,"United Nations Population Division, World Popu...",Reproductive health is a state of physical and...
3,Arab World,ARB,"Adolescent fertility rate (births per 1,000 wo...",SP.ADO.TFRT,1963,1.345048e+02,,,Health: Reproductive health,Adolescent fertility rate is the number of bir...,Weighted average,,"United Nations Population Division, World Popu...",Reproductive health is a state of physical and...
4,Arab World,ARB,"Adolescent fertility rate (births per 1,000 wo...",SP.ADO.TFRT,1964,1.341035e+02,,,Health: Reproductive health,Adolescent fertility rate is the number of bir...,Weighted average,,"United Nations Population Division, World Popu...",Reproductive health is a state of physical and...
5,Arab World,ARB,"Adolescent fertility rate (births per 1,000 wo...",SP.ADO.TFRT,1965,1.335682e+02,,,Health: Reproductive health,Adolescent fertility rate is the number of bir...,Weighted average,,"United Nations Population Division, World Popu...",Reproductive health is a state of physical and...
6,Arab World,ARB,"Adolescent fertility rate (births per 1,000 wo...",SP.ADO.TFRT,1966,1.326774e+02,,,Health: Reproductive health,Adolescent fertility rate is the number of bir...,Weighted average,,"United Nations Population Division, World Popu...",Reproductive health is a state of physical and...
7,Arab World,ARB,"Adolescent fertility rate (births per 1,000 wo...",SP.ADO.TFRT,1967,1.316725e+02,,,Health: Reproductive health,Adolescent fertility rate is the number of bir...,Weighted average,,"United Nations Population Division, World Popu...",Reproductive health is a state of physical and...
8,Arab World,ARB,"Adolescent fertility rate (births per 1,000 wo...",SP.ADO.TFRT,1968,1.292034e+02,,,Health: Reproductive health,Adolescent fertility rate is the number of bir...,Weighted average,,"United Nations Population Division, World Popu...",Reproductive health is a state of physical and...
9,Arab World,ARB,"Adolescent fertility rate (births per 1,000 wo...",SP.ADO.TFRT,1969,1.267538e+02,,,Health: Reproductive health,Adolescent fertility rate is the number of bir...,Weighted average,,"United Nations Population Division, World Popu...",Reproductive health is a state of physical and...


## Move DataFrame to Mongo DB

In [10]:
#change pandas dataframe to dictionary
data=Ind_Country_Series.to_dict("records")

In [11]:
#connect to mongodb database

#app = Flask(__name__)
#mongo = PyMongo(app, uri="mongodb://localhost:27017/WDI")

client = MongoClient('mongodb://localhost:27017/')
dbmongo = client.World_Development_Indicator

In [18]:
fn=0
ln=len(Ind_Country_Series)

In [19]:
ln

5656458

In [20]:
# import to Mongo DB in chunks 

Ind_Country_Series_section=Ind_Country_Series[fn:ln]
nc=100

def chunk(df,x):
    return [ df[i::x] for i in range(x) ]
 
chunks = chunk(Ind_Country_Series_section, nc)

In [21]:
chunks

[                                            CountryName CountryCode  \
 0                                            Arab World         ARB   
 100                              Caribbean small states         CSS   
 200             East Asia & Pacific (all income levels)         EAS   
 300                                           Euro area         EMU   
 400             Europe & Central Asia (developing only)         ECA   
 500            Fragile and conflict affected situations         FCS   
 600              Heavily indebted poor countries (HIPC)         HPC   
 700                                High income: nonOECD         NOC   
 800       Latin America & Caribbean (all income levels)         LCN   
 900        Least developed countries: UN classification         LDC   
 1000                                         Low income         LIC   
 1100     Middle East & North Africa (all income levels)         MEA   
 1200       Middle East & North Africa (developing only)        

In [17]:
col=dbmongo['WDI_general']

#b=col.insert_many(chunks[x].to_dict(orient='records') for x in range(nc))
for count,x in enumerate(range(nc)):
    a=chunks[x].to_dict(orient='records') 
    col.insert_many(a)
    print(f"chunk={count}")

chunk=0
chunk=1
chunk=2
chunk=3
chunk=4
chunk=5
chunk=6
chunk=7
chunk=8
chunk=9
chunk=10
chunk=11
chunk=12
chunk=13
chunk=14
chunk=15
chunk=16
chunk=17
chunk=18
chunk=19
chunk=20
chunk=21
chunk=22
chunk=23
chunk=24
chunk=25
chunk=26
chunk=27
chunk=28
chunk=29
chunk=30
chunk=31
chunk=32
chunk=33
chunk=34
chunk=35
chunk=36
chunk=37
chunk=38
chunk=39
chunk=40
chunk=41
chunk=42
chunk=43
chunk=44
chunk=45
chunk=46
chunk=47
chunk=48
chunk=49
chunk=50
chunk=51
chunk=52
chunk=53
chunk=54
chunk=55
chunk=56
chunk=57
chunk=58
chunk=59
chunk=60
chunk=61
chunk=62
chunk=63
chunk=64
chunk=65
chunk=66
chunk=67
chunk=68
chunk=69
chunk=70
chunk=71
chunk=72
chunk=73
chunk=74
chunk=75
chunk=76
chunk=77
chunk=78
chunk=79
chunk=80
chunk=81
chunk=82
chunk=83
chunk=84
chunk=85
chunk=86
chunk=87
chunk=88
chunk=89
chunk=90
chunk=91
chunk=92
chunk=93
chunk=94
chunk=95
chunk=96
chunk=97
chunk=98
chunk=99
