## Impressions Data Retrieval
---
In this Jupyter notebook, we extract Impressions data on every city in our dataset.

In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import time 
from census import Census
from us import states
import sqlite3
from sqlalchemy import create_engine
import pymysql
pymysql.install_as_MySQLdb()

# Census API Key
c = Census("85ac64b6b5a9c0901b00329d1ef41f0c53ccfc98", year=2015)

### Load the City Census Data

In [2]:
conn = sqlite3.connect("Opportunity_Map.db")
city_census = pd.read_sql("select * from City_Census", conn)
city_census.head()

Unnamed: 0,CityState,city,state,Population,White Population,Black Population,Native American Population,Asian Population,Hispanic Population,Education None,...,Employment Female Computer Engineering,Median Age,Median Male Age,Median Female Age,Household Income,Income Per Capita,Median Gross Rent,Median Home Value,lat,lng
0,"AARONSBURG, PA",AARONSBURG,PA,1058.0,1058.0,0.0,0.0,0.0,0.0,23.0,...,4.0,41.5,43.8,38.9,53000.0,21407.0,642.0,170100.0,40.895701,-77.392432
1,"ABBEVILLE, AL",ABBEVILLE,AL,6470.0,4064.0,2274.0,0.0,0.0,96.0,66.0,...,7.0,50.1,45.3,52.4,33944.0,20104.0,516.0,78100.0,31.595148,-85.208852
2,"ABBEVILLE, GA",ABBEVILLE,GA,4719.0,2676.0,1903.0,0.0,41.0,89.0,55.0,...,0.0,39.3,38.2,45.8,29200.0,10071.0,435.0,58400.0,31.976256,-83.339665
3,"ABBEVILLE, LA",ABBEVILLE,LA,26078.0,17932.0,6457.0,160.0,1059.0,710.0,308.0,...,71.0,35.9,34.9,37.4,42909.0,21520.0,613.0,95800.0,29.894612,-92.193173
4,"ABBEVILLE, MS",ABBEVILLE,MS,2974.0,1432.0,1502.0,0.0,19.0,16.0,20.0,...,2.0,37.6,29.9,46.0,61563.0,26266.0,490.0,70200.0,34.492325,-89.443056


### Load the Impression Data

In [3]:
impressions_pd = pd.read_excel("Raw/Impressions_Data.xlsx")

In [4]:
impressions_pd.head()

Unnamed: 0,CityState,Impressions,CPC,CTR,Cost
0,"New York, NY",3292.1,14.77,0.032,1546.59
1,"Los Angeles, CA",1291.74,20.39,0.028,721.64
2,"San Diego, CA",1023.23,19.63,0.022,428.26
3,"San Francisco, CA",1017.5,20.14,0.023,464.12
4,"Chicago, IL",972.37,14.76,0.034,486.44


### Clean the Impression Key

In [5]:
impressions_pd["CityState"] = impressions_pd["CityState"].str.upper()

In [6]:
impressions_pd["CityState"].head()

0         NEW YORK, NY
1      LOS ANGELES, CA
2        SAN DIEGO, CA
3    SAN FRANCISCO, CA
4          CHICAGO, IL
Name: CityState, dtype: object

### Attempt Merge (for Testing)

In [7]:
census_impressions_merge = pd.merge(impressions_pd, city_census, on="CityState", how="inner")

In [8]:
impressions_pd.count()

CityState      7860
Impressions    7859
CPC            7859
CTR            7859
Cost           7859
dtype: int64

In [9]:
city_census.count()

CityState                                 27632
city                                      27632
state                                     27632
Population                                27632
White Population                          27632
Black Population                          27632
Native American Population                27632
Asian Population                          27632
Hispanic Population                       27632
Education None                            27632
Education High School                     27632
Education GED                             27632
Education Associates                      27632
Education Bachelors                       27632
Education Masters                         27632
Education Professional                    27632
Education Doctorate                       27632
Poverty                                   27632
Employment Labor Force                    27632
Employment Unemployed                     27632
Employment Male Computer Engineering    

In [10]:
census_impressions_merge.count()

CityState                                 7028
Impressions                               7028
CPC                                       7028
CTR                                       7028
Cost                                      7028
city                                      7028
state                                     7028
Population                                7028
White Population                          7028
Black Population                          7028
Native American Population                7028
Asian Population                          7028
Hispanic Population                       7028
Education None                            7028
Education High School                     7028
Education GED                             7028
Education Associates                      7028
Education Bachelors                       7028
Education Masters                         7028
Education Professional                    7028
Education Doctorate                       7028
Poverty      

### Store the Impressions Data

In [11]:
impressions_pd.to_csv("Outputs/04-Impressions.csv")

In [12]:
impressions_pd.to_sql("Impressions", conn, if_exists="replace", index=False)

In [13]:
# Check Tables in SQLite
cur = conn.cursor() 
res = cur.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;")
for name in res:
    print(name[0])

City_Census
FullDistances
Impressions
Minimum_Distances
Rules_Added
Zip_Census


### Export to Cloud SQL

In [48]:
engine = create_engine('mysql+mysqldb://trilogy:DataRocks@35.227.28.228/mapping_data?unix_socket=/cloudsql/sql-projects:us-east1:opportunity-db')
conn = engine.connect()

impressions_pd.to_sql("Impressions", conn, if_exists="replace", index=False)