In [None]:
import pandas as pd
from sqlalchemy import create_engine

## Read Raw Data

In [5]:
# Read csv file
df = pd.read_csv("Data/clean_data/clean_data.csv")
df.head()

Unnamed: 0,area,district,victim_age,victim_gender,victim_race,location,datetime
0,N Hollywood,1591,99.0,Male,O,"(34.1371, -118.4062)",2019-07-06 23:55:00
1,Foothill,1677,45.0,Male,W,"(34.2249, -118.3617)",2019-07-06 05:00:00
2,West Valley,1028,25.0,Male,A,"(34.2012, -118.4989)",2019-07-06 11:30:00
3,Northeast,1153,29.0,Male,O,"(34.1081, -118.3078)",2019-07-06 14:15:00
4,Central,192,41.0,Male,O,"(34.0384, -118.2646)",2019-07-06 12:30:00


## Normalize Data 

### 1.  Area Table

In [27]:
# Select Area from Raw Table
df_area = df.copy()
df_area = pd.DataFrame(df_area[['area','district']]).drop_duplicates().reset_index()
df_area

Unnamed: 0,index,area,district
0,0,N Hollywood,1591
1,1,Foothill,1677
2,2,West Valley,1028
3,3,Northeast,1153
4,4,Central,192
5,5,Hollywood,645
6,6,Newton,1351
7,7,N Hollywood,1547
8,8,Topanga,2157
9,9,Rampart,231


### 2. Gender Table

In [24]:
# Select Gender from Raw Table
df_gender = pd.DataFrame(df['victim_gender']).drop_duplicates().reset_index(drop=True)
df_gender

Unnamed: 0,victim_gender
0,Male
1,Female
2,Unknown


### 3. Ethnicity Table

In [22]:
# Select Race from Raw Table
df_race = pd.DataFrame(df['victim_race'])
race_dict = {'H':'Hispanic', 'B':'Black', 'O':'Unknown', 'W':'White', 'X':'Unknown', '-':'Unknown',
             'A':'Asian', 'K':'Asian', 'C':'Asian', 'F':'Asian', 'U':'Pacific Islander',
             'J':'Asian', 'P':'Pacific Islander', 'V':'Asian', 'Z':'Asian',
             'I':'American Indian', 'G':'Pacific Islander', 'S':'Pacific Islander', 'D':'Asian', 'L':'Asian'}

df_race["race_description"] = df_race["victim_race"].map(race_dict)
df_race=df_race.drop_duplicates().reset_index(drop = True)
df_race

Unnamed: 0,victim_race,race_description
0,O,Unknown
1,W,White
2,A,Asian
3,H,Hispanic
4,B,Black
5,X,Unknown
6,C,Asian
7,K,Asian
8,F,Asian
9,V,Asian


### 4. Crash Table

In [30]:
# Select Gender from Raw Table
df_crash = pd.DataFrame(df[['location','datetime']])
df_crash.head()

Unnamed: 0,location,datetime
0,"(34.1371, -118.4062)",2019-07-06 23:55:00
1,"(34.2249, -118.3617)",2019-07-06 05:00:00
2,"(34.2012, -118.4989)",2019-07-06 11:30:00
3,"(34.1081, -118.3078)",2019-07-06 14:15:00
4,"(34.0384, -118.2646)",2019-07-06 12:30:00


### 5. Victim Table

In [32]:
# create victim table
df['victim_age'] = df['victim_age'].apply(int)
df_victim = pd.DataFrame(df[['victim_age']])
df_victim.head()

Unnamed: 0,victim_age
0,99
1,45
2,25
3,29
4,41


## Export CSV

In [33]:
df_victim.to_csv("data/clean_data/Victim.csv",index=False)
df_crash.to_csv("data/clean_data/Crash.csv",index=False)
df_race.to_csv("data/clean_data/Ethnicity.csv",index=False)
df_gender.to_csv("data/clean_data/Gender.csv",index=False)
df_area.to_csv("data/clean_data/Area.csv",index=False)