In [47]:
import pandas as pd
from sqlalchemy import create_engine
import numpy as np

### Store CSV into DataFrame

In [4]:
csv_file = "monster_com-job_sample.csv"
jobs_df = pd.read_csv(csv_file)
jobs_df.head()


Unnamed: 0,country,country_code,date_added,has_expired,job_board,job_description,job_title,job_type,location,organization,page_url,salary,sector,uniq_id
0,United States of America,US,,No,jobs.monster.com,TeamSoft is seeing an IT Support Specialist to...,IT Support Technician Job in Madison,Full Time Employee,"Madison, WI 53702",,http://jobview.monster.com/it-support-technici...,,IT/Software Development,11d599f229a80023d2f40e7c52cd941e
1,United States of America,US,,No,jobs.monster.com,The Wisconsin State Journal is seeking a flexi...,Business Reporter/Editor Job in Madison,Full Time,"Madison, WI 53708",Printing and Publishing,http://jobview.monster.com/business-reporter-e...,,,e4cbb126dabf22159aff90223243ff2a
2,United States of America,US,,No,jobs.monster.com,Report this job About the Job DePuy Synthes Co...,Johnson & Johnson Family of Companies Job Appl...,"Full Time, Employee",DePuy Synthes Companies is a member of Johnson...,Personal and Household Services,http://jobview.monster.com/senior-training-lea...,,,839106b353877fa3d896ffb9c1fe01c0
3,United States of America,US,,No,jobs.monster.com,Why Join Altec? If you’re considering a career...,Engineer - Quality Job in Dixon,Full Time,"Dixon, CA",Altec Industries,http://jobview.monster.com/engineer-quality-jo...,,Experienced (Non-Manager),58435fcab804439efdcaa7ecca0fd783
4,United States of America,US,,No,jobs.monster.com,Position ID# 76162 # Positions 1 State CT C...,Shift Supervisor - Part-Time Job in Camphill,Full Time Employee,"Camphill, PA",Retail,http://jobview.monster.com/shift-supervisor-pa...,,Project/Program Management,64d0272dc8496abfd9523a8df63c184c


### Create new data with select columns

In [21]:
new_jobs_df = jobs_df[['job_title', 'job_type', 'sector','location']].copy()
new_jobs_df.head()

Unnamed: 0,job_title,job_type,sector,location
0,IT Support Technician Job in Madison,Full Time Employee,IT/Software Development,"Madison, WI 53702"
1,Business Reporter/Editor Job in Madison,Full Time,,"Madison, WI 53708"
2,Johnson & Johnson Family of Companies Job Appl...,"Full Time, Employee",,DePuy Synthes Companies is a member of Johnson...
3,Engineer - Quality Job in Dixon,Full Time,Experienced (Non-Manager),"Dixon, CA"
4,Shift Supervisor - Part-Time Job in Camphill,Full Time Employee,Project/Program Management,"Camphill, PA"


In [23]:
#Renames job types.
new_jobs_df['job_type'] = new_jobs_df['job_type'].map(
    lambda x: x if pd.isnull(x) else 'Full Time' if 'Full' in x else 'Part Time' if 'Part' in x else 'Other'
)

new_jobs_df.head()

Unnamed: 0,job_title,job_type,sector,location
0,IT Support Technician Job in Madison,Full Time,IT/Software Development,"Madison, WI 53702"
1,Business Reporter/Editor Job in Madison,Full Time,,"Madison, WI 53708"
2,Johnson & Johnson Family of Companies Job Appl...,Full Time,,DePuy Synthes Companies is a member of Johnson...
3,Engineer - Quality Job in Dixon,Full Time,Experienced (Non-Manager),"Dixon, CA"
4,Shift Supervisor - Part-Time Job in Camphill,Full Time,Project/Program Management,"Camphill, PA"


In [55]:
new_jobs_df=new_jobs_df[new_jobs_df['location'].str.len() < 30]

location=new_jobs_df['location'].str.split(',')
new_jobs_df['City']=location.str[0]
new_jobs_df['State']=location.str[1].str[0:3]

new_jobs_df.loc[new_jobs_df['State'] != np.nan]


Unnamed: 0,job_title,job_type,sector,location,City,State
0,IT Support Technician Job in Madison,Full Time,IT/Software Development,"Madison, WI 53702",Madison,WI
1,Business Reporter/Editor Job in Madison,Full Time,,"Madison, WI 53708",Madison,WI
3,Engineer - Quality Job in Dixon,Full Time,Experienced (Non-Manager),"Dixon, CA",Dixon,CA
4,Shift Supervisor - Part-Time Job in Camphill,Full Time,Project/Program Management,"Camphill, PA",Camphill,PA
5,Construction PM - Charlottesville Job in Charl...,Full Time,Experienced (Non-Manager),"Charlottesville, VA",Charlottesville,VA
6,CyberCoders Job Application for Principal QA E...,Full Time,,Contact name Tony Zerio,Contact name Tony Zerio,
7,Mailroom Clerk Job in Austin,Full Time,Experienced (Non-Manager),"Austin, TX 73301",Austin,TX
8,Housekeeper Job in Austin,Part Time,Customer Support/Client Care,"Austin, TX 78746",Austin,TX
9,Video Data Management /Transportation Technici...,,,"Chesterfield, MO",Chesterfield,MO
10,Aflac Insurance Sales Agent Job in Berryville,Full Time,Customer Support/Client Care,"Berryville, VA 22611",Berryville,VA


### Store JSON data into a DataFrame

In [4]:
json_file = "../Resources/customer_location.json"
customer_location_df = pd.read_json(json_file)
customer_location_df.head()

Unnamed: 0,address,id,latitude,longitude,us_state
0,043 Mockingbird Place,1,39.1682,-86.5186,Indiana
1,4 Prentice Point,2,41.0938,-85.0707,Indiana
2,46 Derek Junction,3,32.7673,-96.7776,Texas
3,11966 Old Shore Place,4,39.035,-94.3567,Missouri
4,5 Evergreen Circle,5,40.7808,-73.9772,New York


### Clean DataFrame

In [5]:
new_customer_location_df = customer_location_df[["id", "address", "us_state"]].copy()
new_customer_location_df.head()

Unnamed: 0,id,address,us_state
0,1,043 Mockingbird Place,Indiana
1,2,4 Prentice Point,Indiana
2,3,46 Derek Junction,Texas
3,4,11966 Old Shore Place,Missouri
4,5,5 Evergreen Circle,New York


### Connect to local database

In [6]:
rds_connection_string = "<insert user name>:<insert password>@127.0.0.1/customer_db"
engine = create_engine(f'mysql://{rds_connection_string}')

### Check for tables

In [7]:
engine.table_names()

['customer_location', 'customer_name']

### Use pandas to load csv converted DataFrame into database

In [8]:
new_customer_data_df.to_sql(name='customer_name', con=engine, if_exists='append', index=False)

### Use pandas to load json converted DataFrame into database

In [9]:
new_customer_location_df.to_sql(name='customer_location', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the customer_name table
* NOTE: can also check using pgAdmin

In [10]:
pd.read_sql_query('select * from customer_name', con=engine).head()

Unnamed: 0,id,first_name,last_name
0,1,Benetta,Cancott
1,2,Lilyan,Cherry
2,3,Ezekiel,Benasik
3,4,Kennedy,Atlay
4,5,Sanford,Salmen


### Confirm data has been added by querying the customer_location table

In [11]:
pd.read_sql_query('select * from customer_location', con=engine).head()

Unnamed: 0,id,address,us_state
0,1,043 Mockingbird Place,Indiana
1,2,4 Prentice Point,Indiana
2,3,46 Derek Junction,Texas
3,4,11966 Old Shore Place,Missouri
4,5,5 Evergreen Circle,New York
