In [1]:
# Import Dependencies
from pymongo import MongoClient
from config import cloudM, cloudMpassword
import pandas as pd
import numpy as np
import datetime as dt
import csv
import re

In [2]:
# set up display area to show dataframe 
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

## Load Demographics DFs

In [3]:
#load data
relig = pd.read_csv("cia_religions.csv")
lang= pd.read_csv("cia_languages.csv")

In [4]:
relig = relig.drop("_id",  axis = 1)
relig.head()

Unnamed: 0,Country,People and Society: Nationality - noun,Population (as of 2022),Top Religion - Name,Top Religion - Percent of Population
0,Afghanistan,Afghan(s),38346720,Muslim,99.7
1,Albania,Albanian(s),3095344,Muslim,56.7
2,Algeria,Algerian(s),44178884,Muslim,
3,American Samoa,American Samoan(s) (US nationals),45443,Christian,98.3
4,Andorra,Andorran(s),85560,Christian,


In [5]:
#load data
lang = lang.drop(["_id","People and Society: Nationality - noun"], axis = 1)
lang.head()

Unnamed: 0,Country,Population (as of 2022),Top Language
0,Afghanistan,38346720,Afghan
1,Albania,3095344,Albania
2,Algeria,44178884,Arabic
3,American Samoa,45443,Samoan
4,Andorra,85560,Catalan


## Join both Demographics DFs

In [6]:
print(f"Religion Df's shape is {relig.shape}")
print(f"Language Df's shape is {lang.shape}")

Religion Df's shape is (228, 5)
Language Df's shape is (229, 3)


In [7]:
#join DFs
join_df = pd.merge(lang, relig)
join_df.head()

Unnamed: 0,Country,Population (as of 2022),Top Language,People and Society: Nationality - noun,Top Religion - Name,Top Religion - Percent of Population
0,Afghanistan,38346720,Afghan,Afghan(s),Muslim,99.7
1,Albania,3095344,Albania,Albanian(s),Muslim,56.7
2,Algeria,44178884,Arabic,Algerian(s),Muslim,
3,American Samoa,45443,Samoan,American Samoan(s) (US nationals),Christian,98.3
4,Andorra,85560,Catalan,Andorran(s),Christian,


In [8]:
#re-order columns
join_df = join_df[["Country", "People and Society: Nationality - noun", "Population (as of 2022)", "Top Language", "Top Religion - Name", "Top Religion - Percent of Population"]]

In [9]:
#rename column
join_df = join_df.rename(columns = {"People and Society: Nationality - noun" : "Nationality - Noun"})

In [10]:
join_df.head()

Unnamed: 0,Country,Nationality - Noun,Population (as of 2022),Top Language,Top Religion - Name,Top Religion - Percent of Population
0,Afghanistan,Afghan(s),38346720,Afghan,Muslim,99.7
1,Albania,Albanian(s),3095344,Albania,Muslim,56.7
2,Algeria,Algerian(s),44178884,Arabic,Muslim,
3,American Samoa,American Samoan(s) (US nationals),45443,Samoan,Christian,98.3
4,Andorra,Andorran(s),85560,Catalan,Christian,


## Load Terror DF

In [11]:
#load terror_df
terror = pd.read_csv("clean_terror_df.csv")
terror.head()

Unnamed: 0,CATEGORY,COUNTRY,DATE,DEAD,DESCRIPTION,INJURED,LATITUDE,LONGITUDE,REGION,SUBREGION,YEAR,_id
0,Assault,Indonesia,2022-07-16T00:00:00.000Z,10,Around 20 gunmen attacked the village of Nogol...,2,-4.318185,138.180163,Asia,South-Eastern Asia,2022,630053b3d24d700f0f23d36d
1,Assault,Iran,2022-04-05T00:00:00.000Z,2,A stabbing attack at the Imam Reza shrine kill...,1,36.297494,59.605923,Asia,Southern Asia,2022,630053b3d24d700f0f23d375
2,Explosion,Afghanistan,2022-04-19T00:00:00.000Z,6,Three bombs targeted the Abdul Rahim Shahid Sc...,25,34.526011,69.177684,Asia,Southern Asia,2022,630053b3d24d700f0f23d373
3,"Assault,Hostage",Niger,2020-08-09T00:00:00.000Z,8,Islamic State in the Greater Sahara militants ...,0,17.735621,9.323843,Africa,Western Africa,2020,630053b3d24d700f0f23d396
4,Assault,United Kingdom,2020-06-20T00:00:00.000Z,3,"Shortly before 19:00 BST, 25 year old Libyan b...",3,51.456659,-0.969651,Europe,Northern Europe,2020,630053b3d24d700f0f23d39a


## Join the Demographics DF with Terror DF to show Acts per Country

In [12]:
#count the terroists act per country
per_country = terror.groupby(["COUNTRY"]).count().reset_index()

In [13]:
#drop extra columns
per_country = per_country.drop(["DATE","DEAD", "DESCRIPTION", "INJURED", "LATITUDE", "LONGITUDE", "REGION", "SUBREGION", "YEAR", "_id"], axis = 1)
per_country = per_country.rename(columns = {"COUNTRY" : "Country", "CATEGORY" : "Number of Terrorist Acts"})

In [14]:
per_country.head()

Unnamed: 0,Country,Number of Terrorist Acts
0,Afghanistan,453
1,Algeria,56
2,Angola,15
3,Argentina,47
4,Armenia,1


In [15]:
per_country.shape

(122, 2)

In [17]:
#join the join_df with per country df
final_df = pd.merge(join_df, per_country)
final_df.head()

Unnamed: 0,Country,Nationality - Noun,Population (as of 2022),Top Language,Top Religion - Name,Top Religion - Percent of Population,Number of Terrorist Acts
0,Afghanistan,Afghan(s),38346720,Afghan,Muslim,99.7,453
1,Algeria,Algerian(s),44178884,Arabic,Muslim,,56
2,Angola,Angolan(s),34795287,Portuguese,Roman Catholic,41.1,15
3,Argentina,Argentine(s),46245668,Spanish,Roman Catholic,62.9,47
4,Armenia,Armenian(s),3000756,Armenia,Armenian Apostolic,9.0,1


## add to Mongo

In [18]:
# connect notebook to cloud
from config import cloudM, cloudMpassword
cloudstr  = f"mongodb+srv://{cloudM}:{cloudMpassword}@finalproject.1pamme7.mongodb.net/test"

In [19]:
cloudclient = MongoClient(cloudstr)
#upload to cloud

#upload final DF
db = cloudclient['final_project']
colmanager = db["demographics_terror_acts_per_country"]
colmanager.insert_many(final_df.to_dict('records'))

<pymongo.results.InsertManyResult at 0x7f8dc8187bd0>

In [20]:
#upload join DF
db = cloudclient['final_project']
colmanager = db["religion_and_language"]
colmanager.insert_many(final_df.to_dict('records'))

<pymongo.results.InsertManyResult at 0x7f8deb397e90>