In [1]:
#Imports
import pandas as pd
import pyodbc

In [2]:
# Watermark
print('Nathan Young\nJunior Data Analyst\nCenter for the Study of Free Enterprise')
%load_ext watermark
%watermark -a "Western Carolina University" -u -d -p pandas

Nathan Young
Junior Data Analyst
Center for the Study of Free Enterprise
Western Carolina University 
last updated: 2020-01-28 

pandas 0.25.3


In [3]:
# Create backups
#df_backup = pd.read_csv('./Updates/STG_ZLLW_County_MedianValuePerSqft_AllHomes.txt')
#df_backup.to_csv('./Backups/STG_ZLLW_County_MedianValuePerSqft_AllHomes_BACKUP.txt')

In [4]:
#Load Land data
df_mvsf = pd.read_csv('http://files.zillowstatic.com/research/public/County/County_MedianValuePerSqft_AllHomes.csv', 
                      encoding='ISO-8859-1')

#Display table to ensure data loaded correctly
df_mvsf.head()

Unnamed: 0,RegionID,RegionName,State,Metro,StateCodeFIPS,MunicipalCodeFIPS,SizeRank,1996-04,1996-05,1996-06,...,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11,2019-12
0,3101,Los Angeles County,CA,Los Angeles-Long Beach-Anaheim,6,37,1,1019.0,1019.0,1019.0,...,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020
1,139,Cook County,IL,Chicago-Naperville-Elgin,17,31,2,463.0,463.0,463.0,...,464,464,464,464,464,464,464,464,464,464
2,1090,Harris County,TX,Houston-The Woodlands-Sugar Land,48,201,3,166.0,166.0,166.0,...,166,166,166,166,166,166,166,166,166,166
3,2402,Maricopa County,AZ,Phoenix-Mesa-Scottsdale,4,13,4,454.0,454.0,454.0,...,454,454,454,454,454,454,454,454,454,454
4,2841,San Diego County,CA,San Diego-Carlsbad,6,73,5,897.0,897.0,897.0,...,898,898,898,898,898,898,898,898,898,898


In [5]:
#Filter data to NC
filter1 = df_mvsf['State'] == "NC"
df_mvsf_nc = df_mvsf[filter1]

#Check to ensure filter worked
df_mvsf_nc.head(5)

Unnamed: 0,RegionID,RegionName,State,Metro,StateCodeFIPS,MunicipalCodeFIPS,SizeRank,1996-04,1996-05,1996-06,...,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11,2019-12
48,3102,Mecklenburg County,NC,Charlotte-Concord-Gastonia,37,119,49,191.0,191.0,191.0,...,191,191,191,191,191,191,191,191,191,191
55,324,Wake County,NC,Raleigh,37,183,56,221.0,221.0,221.0,...,221,221,221,221,221,221,221,221,221,221
134,2312,Guilford County,NC,Greensboro-High Point,37,81,135,144.0,144.0,144.0,...,144,144,144,144,144,144,144,144,144,144
183,1712,Forsyth County,NC,Winston-Salem,37,67,184,190.0,190.0,190.0,...,190,190,190,190,190,190,190,190,190,190
198,2912,Cumberland County,NC,Fayetteville,37,51,199,148.0,148.0,148.0,...,148,148,148,148,148,148,148,148,148,149


In [6]:
#View data types of dataframe
df_mvsf_nc.dtypes

RegionID          int64
RegionName       object
State            object
Metro            object
StateCodeFIPS     int64
                  ...  
2019-08           int64
2019-09           int64
2019-10           int64
2019-11           int64
2019-12           int64
Length: 292, dtype: object

In [7]:
#Change MunicipalCodeFIPS dtype to add leading 0's
df_mvsf_nc.loc[ :, 'MunicipalCodeFIPS'] = df_mvsf_nc['MunicipalCodeFIPS'].astype(str)
df_mvsf_nc.dtypes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


RegionID          int64
RegionName       object
State            object
Metro            object
StateCodeFIPS     int64
                  ...  
2019-08           int64
2019-09           int64
2019-10           int64
2019-11           int64
2019-12           int64
Length: 292, dtype: object

In [8]:
#Add leading 0's and check to ensure they were added
df_mvsf_nc.loc[ :, 'MunicipalCodeFIPS'] = df_mvsf_nc['MunicipalCodeFIPS'].str.zfill(3)
df_mvsf_nc.head(5)

Unnamed: 0,RegionID,RegionName,State,Metro,StateCodeFIPS,MunicipalCodeFIPS,SizeRank,1996-04,1996-05,1996-06,...,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11,2019-12
48,3102,Mecklenburg County,NC,Charlotte-Concord-Gastonia,37,119,49,191.0,191.0,191.0,...,191,191,191,191,191,191,191,191,191,191
55,324,Wake County,NC,Raleigh,37,183,56,221.0,221.0,221.0,...,221,221,221,221,221,221,221,221,221,221
134,2312,Guilford County,NC,Greensboro-High Point,37,81,135,144.0,144.0,144.0,...,144,144,144,144,144,144,144,144,144,144
183,1712,Forsyth County,NC,Winston-Salem,37,67,184,190.0,190.0,190.0,...,190,190,190,190,190,190,190,190,190,190
198,2912,Cumberland County,NC,Fayetteville,37,51,199,148.0,148.0,148.0,...,148,148,148,148,148,148,148,148,148,149


In [9]:
# Set Index to Region Name
df_mvsf_nc.set_index(df_mvsf_nc['RegionName'], inplace = True)
df_mvsf_nc

Unnamed: 0_level_0,RegionID,RegionName,State,Metro,StateCodeFIPS,MunicipalCodeFIPS,SizeRank,1996-04,1996-05,1996-06,...,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11,2019-12
RegionName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Mecklenburg County,3102,Mecklenburg County,NC,Charlotte-Concord-Gastonia,37,119,49,191.0,191.0,191.0,...,191,191,191,191,191,191,191,191,191,191
Wake County,324,Wake County,NC,Raleigh,37,183,56,221.0,221.0,221.0,...,221,221,221,221,221,221,221,221,221,221
Guilford County,2312,Guilford County,NC,Greensboro-High Point,37,081,135,144.0,144.0,144.0,...,144,144,144,144,144,144,144,144,144,144
Forsyth County,1712,Forsyth County,NC,Winston-Salem,37,067,184,190.0,190.0,190.0,...,190,190,190,190,190,190,190,190,190,190
Cumberland County,2912,Cumberland County,NC,Fayetteville,37,051,199,148.0,148.0,148.0,...,148,148,148,148,148,148,148,148,148,149
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Jones County,575,Jones County,NC,New Bern,37,103,2403,,,,...,67,67,67,67,67,67,67,67,67,67
Camden County,910,Camden County,NC,Elizabeth City,37,029,2419,,,,...,554,554,554,554,554,554,554,554,554,554
Graham County,1049,Graham County,NC,,37,075,2503,,,,...,78,78,78,78,78,78,78,78,78,78
Hyde County,186,Hyde County,NC,,37,095,2719,,,,...,71,71,71,71,71,71,71,71,71,71


In [10]:
# Drop Region Name column
df_mvsf_nc.drop('RegionName', axis = 1, inplace = True)
df_mvsf_nc

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0_level_0,RegionID,State,Metro,StateCodeFIPS,MunicipalCodeFIPS,SizeRank,1996-04,1996-05,1996-06,1996-07,...,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11,2019-12
RegionName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Mecklenburg County,3102,NC,Charlotte-Concord-Gastonia,37,119,49,191.0,191.0,191.0,191.0,...,191,191,191,191,191,191,191,191,191,191
Wake County,324,NC,Raleigh,37,183,56,221.0,221.0,221.0,221.0,...,221,221,221,221,221,221,221,221,221,221
Guilford County,2312,NC,Greensboro-High Point,37,081,135,144.0,144.0,144.0,144.0,...,144,144,144,144,144,144,144,144,144,144
Forsyth County,1712,NC,Winston-Salem,37,067,184,190.0,190.0,190.0,190.0,...,190,190,190,190,190,190,190,190,190,190
Cumberland County,2912,NC,Fayetteville,37,051,199,148.0,148.0,148.0,148.0,...,148,148,148,148,148,148,148,148,148,149
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Jones County,575,NC,New Bern,37,103,2403,,,,,...,67,67,67,67,67,67,67,67,67,67
Camden County,910,NC,Elizabeth City,37,029,2419,,,,,...,554,554,554,554,554,554,554,554,554,554
Graham County,1049,NC,,37,075,2503,,,,,...,78,78,78,78,78,78,78,78,78,78
Hyde County,186,NC,,37,095,2719,,,,,...,71,71,71,71,71,71,71,71,71,71


In [11]:
#Save to csv file for export in Excel
#df_mvsf_nc.to_csv('./Updates/STG_ZLLW_County_MedianValuePerSqft_AllHomes.txt', sep = '\t')

In [12]:
#Connect to database and create cursor
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=TITANIUM-BOOK;'
                      'Database=DataDashboard;'
                      'Trusted_Connection=yes;')

c = conn.cursor()

In [13]:
#Verify data is in database
c.execute('select top(1) * from dbo.STG_ZLLW_County_MedianValuePerSqft_AllHomes')
for row in c:
    print(row)

ProgrammingError: ('42S02', "[42S02] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid object name 'dbo.STG_ZLLW_County_MedianValuePerSqft_AllHomes'. (208) (SQLExecDirectW)")

In [None]:
#Drop old backup table
#c.execute('drop table STG_ZLLW_County_MedianValuePerSqft_AllHomes_BACKUP')

In [None]:
#Create new backup
c.execute("sp_rename 'dbo.STG_ZLLW_County_MedianValuePerSqft_AllHomes','STG_ZLLW_County_MedianValuePerSqft_AllHomes_BACKUP';")
conn.commit()

In [None]:
#Verify backups are created
c.execute('select top(1) * from dbo.STG_ZLLW_County_MedianValuePerSqft_AllHomes_BACKUP')
for row in c:
    print(row)

In [None]:
conn.close()