## Transforming Data for Mongo DB

This notebook loads csv files of data previously cleaned and loads them to Mongo DB and transforms them to JSON.

* cities - information on the income and poverty levels by city with associated lat and long
* states - infomation at the state level on income and state laws
* guns - information on the gun violence
---


In [1]:
import pandas as pd 
import os
import numpy as np

import pymongo
import json

import requests


# function to save dataframe to collection_name in MongoDB 'wines'    
def saveMongo(df, collection_name, replace=False):
    mng_client = pymongo.MongoClient('localhost', 27017)
    mng_db = mng_client['guns'] 
    if replace:
        mng_db[collection_name].drop() 
    db_cm = mng_db[collection_name]
    data = df
    data_json = json.loads(data.to_json(orient='records', date_unit='ns'))
    #db_cm.delete_many()
    db_cm.insert_many(data_json)
    


## Load csv files into pandas dataframes, clean, save to mongo db

In [2]:
# read in cities data
cities_path = os.path.join("..","Data","Cities.csv")

df_cities = pd.read_csv(cities_path, encoding="UTF-8")
df_cities.head()

# # save to/replace collection "cities" in "guns" mongo db
saveMongo(df_cities, "cities", replace=True)


In [3]:
# read in state data
states_path = os.path.join("..","Data","States.csv")

df_states = pd.read_csv(states_path, encoding="UTF-8")
df_states = df_states[["state","census_2010","pop_estimate_2015","2015_median_income", "age18longgunpossess","age21longgunpossess","assault","mentalhealth","universal"]]

df_states.head()


# # save to/replace collection "states" in "guns" mongo db
saveMongo(df_states, "states", replace=True)

In [4]:
# Loading gun violence
guns_path = os.path.join("..","Data","Shootings_2015.csv")

df_guns = pd.read_csv(guns_path, encoding="UTF-8")
df_guns = df_guns[["incident_id","date","state","city_or_county","n_killed","n_injured","incident_characteristics","latitude","longitude","mass","gang","domestic","non-shooting","accidental","prohibited","officer"]]
df_guns["n_involved"] = df_guns["n_killed"]+df_guns["n_injured"]
df_guns["year"]= df_guns["date"].str[:4]

# Create a column to record type of shooting
conditions = [
    (df_guns["mass"]==1),
    (df_guns["n_involved"] == 0),
    (df_guns["n_killed"]==0)]
choices = ["mass shooting", "no injuries","injuries only"]
df_guns["shoot_type"] = np.select(conditions, choices, default="some dead")

df_guns.head()

# Add in state level data for filtering purposes
df_guns_complete = pd.merge(df_guns, df_states, on="state", how="left")
df_guns_complete.head()

# save to/replace collection "guns" in "guns" mongo db
saveMongo(df_guns_complete, "guns", replace=True)


FileNotFoundError: File b'../Data/gun_violence_2014-2017.csv' does not exist