# Drought vs election Data USA

In [7]:
import pandas as pd
df_drought_usa = pd.read_csv('Data/United_States_Drought_Monitor__2000-2016.csv')

# Formatting draught data

In [8]:
df_drought_usa

Unnamed: 0,year,month,day,statefips,countyfips,value
0,2000,1,4,2,2013,9
1,2000,1,11,2,2013,9
2,2000,1,18,2,2013,9
3,2000,1,25,2,2013,9
4,2000,2,1,2,2013,9
...,...,...,...,...,...,...
2786062,2016,11,29,56,56045,2
2786063,2016,12,6,56,56045,2
2786064,2016,12,13,56,56045,2
2786065,2016,12,20,56,56045,2


In [53]:
# countyfips: Adding a zero as a first digit when there are only 4 digits instead of 5 (convert countyfips to string)
df_drought_usa.countyfips = df_drought_usa.countyfips.astype(str).str.rjust(5, fillchar="0")
df_drought_usa.countyfips.head()

AttributeError: 'DataFrame' object has no attribute 'countyfips'

In [45]:
df_drought_usa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2786067 entries, 0 to 2786066
Data columns (total 6 columns):
 #   Column      Dtype 
---  ------      ----- 
 0   year        int64 
 1   month       int64 
 2   day         int64 
 3   statefips   int64 
 4   countyfips  object
 5   value       int64 
dtypes: int64(5), object(1)
memory usage: 127.5+ MB


In [46]:
# Changing countyfips to integer
df_drought_usa.countyfips = df_drought_usa.countyfips.astype(int)

In [47]:
df_drought_usa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2786067 entries, 0 to 2786066
Data columns (total 6 columns):
 #   Column      Dtype
---  ------      -----
 0   year        int64
 1   month       int64
 2   day         int64
 3   statefips   int64
 4   countyfips  int32
 5   value       int64
dtypes: int32(1), int64(5)
memory usage: 116.9 MB


In [49]:
df_drought_usa.rename(columns = {'statefips' : 'State FIPS', 'countyfips': 'County FIPS 5' }, inplace= True)

In [50]:
df_drought_usa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2786067 entries, 0 to 2786066
Data columns (total 6 columns):
 #   Column         Dtype
---  ------         -----
 0   year           int64
 1   month          int64
 2   day            int64
 3   State FIPS     int64
 4   County FIPS 5  int32
 5   value          int64
dtypes: int32(1), int64(5)
memory usage: 116.9 MB


In [51]:
df_drought_usa.to_csv('shark2_drought_data_usa.csv', index =False)

# Formatting data of the presidential election

In [14]:
df_county_presidential = pd.read_csv("Data/countypres_2000-2020_county.csv")

In [15]:
df_county_presidential.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72617 entries, 0 to 72616
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   year            72617 non-null  int64  
 1   state           72617 non-null  object 
 2   state_po        72617 non-null  object 
 3   county_name     72617 non-null  object 
 4   county_fips     72560 non-null  float64
 5   office          72617 non-null  object 
 6   candidate       72617 non-null  object 
 7   party           72617 non-null  object 
 8   candidatevotes  72617 non-null  int64  
 9   totalvotes      72617 non-null  int64  
 10  version         72617 non-null  int64  
 11  mode            72617 non-null  object 
dtypes: float64(1), int64(4), object(7)
memory usage: 6.6+ MB


In [52]:
# county_fips: Adding a zero as a first digit when there are only 4 digits instead of 5 (convert county_fips to string)
df_county_presidential.county_fips.astype(str).str.replace('.0', '', regex=False).str.rjust(5, fillchar="0")

0        01001
1        01001
2        01001
3        01001
4        01003
         ...  
72612    56043
72613    56045
72614    56045
72615    56045
72616    56045
Name: county_fips, Length: 72617, dtype: object

In [56]:
# conversion to 'int' does not work because of NA values > Convert NA to 0
df_county_presidential['county_fips'] = df_county_presidential['county_fips'].fillna(0)

In [57]:
# Convert county_fips to integer
df_county_presidential.county_fips = df_county_presidential.county_fips.astype(int)

In [58]:
# rename county_fips to County FIPS 5
df_county_presidential.rename(columns = {'county_fips': 'County FIPS 5' }, inplace= True)

In [59]:
df_county_presidential.to_csv('Data/shark2_county_presidential_usa.csv', index =False)

# Formatting Fips Codes

In [34]:
# example data of how to format Fips Codes in tableau
df_fips_5 = pd.read_csv("Data/NY FIPS 5 (FIPS Codes)_Migrated Data.csv", sep =';')

In [35]:
df_fips_5.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62 entries, 0 to 61
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   County             62 non-null     object
 1   County FIPS 5      62 non-null     int64 
 2   State              62 non-null     object
 3   Number of Records  62 non-null     int64 
dtypes: int64(2), object(2)
memory usage: 2.1+ KB


In [37]:
df_fips_5.head(3)

Unnamed: 0,County,County FIPS 5,State,Number of Records
0,Albany,36001,New York,1
1,Allegany,36003,New York,1
2,Bronx,36005,New York,1


In [38]:
df_drought_usa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2786067 entries, 0 to 2786066
Data columns (total 6 columns):
 #   Column      Dtype
---  ------      -----
 0   year        int64
 1   month       int64
 2   day         int64
 3   statefips   int64
 4   countyfips  int32
 5   value       int64
dtypes: int32(1), int64(5)
memory usage: 116.9 MB
