In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from scipy.stats import linregress

In [3]:
#homelessness dataframe creation
file_to_load = "Homelessness_Count_USA.csv"
homelessness_data = pd.read_csv(file_to_load)
homelessness_df = pd.DataFrame(homelessness_data)

#list of counties for looking through homeless data with
counties = ["Los Angeles City & County CoC", 
            "San Francisco CoC", 
            "Las Vegas/Clark County CoC",
            "Metropolitan Denver Homeless Initiative", 
            "Kansas City/Independence/Lee's Summit/Jackson County CoC", 
            "Cook County CoC", 
            "Fulton County Continuum of Care", 
            "Miami/Dade County CoC", 
            "Philadelphia CoC", 
            "Phoenix/Mesa/Maricopa County Regional CoC",
            "Boston CoC", 
            "Detroit CoC", 
            "Indianapolis CoC", 
            "Seattle/King County CoC", 
            "Dallas City & County/Irving CoC", 
            "Nashville/Davidson County CoC", 
            "Albuquerque CoC", 
            "Charlotte/Mecklenberg CoC", 
            "New York City CoC", 
            "Minneapolis/Hennepin County CoC"]

#preparing to drop indexes that are not total homeless
index_names = homelessness_df[homelessness_df['Attribute Name'] != 'Total Homeless'].index

#new df with just total homeless values
clean_homeless_data = homelessness_df.drop(index_names)
clean_homeless_data

#drawing out only 2014 values
remove_2015 = clean_homeless_data[clean_homeless_data['Year - Text'] != 2014].index
homeless_2014 = clean_homeless_data.drop(remove_2015)
county_clean_homeless_data_2014_df = homeless_2014.set_index('CoC Name')

#drawing out only 2015 values
remove_2014 = clean_homeless_data[clean_homeless_data['Year - Text'] != 2015].index
homeless_2015 = clean_homeless_data.drop(remove_2014)
county_clean_homeless_data_2015_df = homeless_2015.set_index('CoC Name')

#print test to make sure the specific year will print
print(county_clean_homeless_data_2014_df.loc['San Francisco CoC','Value'])
print(county_clean_homeless_data_2015_df.loc['San Francisco CoC','Value'])

6408
6775


In [4]:
#want to take specific data and append it to list
list_2014_total_homeless = []
list_2015_total_homeless = []

for county_name in counties:
    homeless_value_2014 = county_clean_homeless_data_2014_df.loc[county_name, 'Value']
    list_2014_total_homeless.append(homeless_value_2014)
    homeless_value_2015 = homeless_value_2015 = county_clean_homeless_data_2015_df.loc[county_name, 'Value']
    list_2015_total_homeless.append(homeless_value_2015)  
    
print(list_2014_total_homeless)
print(list_2015_total_homeless)

[34393, 6408, 7443, 6621, 1931, 1182, 477, 4156, 5738, 5918, 5987, 2755, 1890, 8949, 3514, 2234, 1254, 2014, 67810, 3731]
[41174, 6775, 7509, 5335, 1446, 895, 473, 4152, 5998, 5631, 6492, 2597, 1666, 10122, 3141, 2154, 1287, 2001, 75323, 3215]


In [30]:
#created city list from courtney's list in case we want to merge information of weather and so we can merge on same 'on'
matching_citylist = ['Los_Angeles_County', 
                     'San_Francisco_County', 
                     'Clark_County', 
                     'Denver_County', 
                     'Jackson_County', 
                     'Cook_County', 
                     'Fulton_County', 
                     'Miami_Dade_County', 
                     'Philadelphia_County', 
                     'Maricopa_County', 
                     'Suffolk_County', 
                     'Wayne_County', 
                     'Marion_County', 
                     'King_County', 
                     'Dallas_County',
                     'Davidson_County', 
                     'Bernalillo_County', 
                     'Mecklenburg_County', 
                     'New_York_County', 
                     'Hennepin_County']

#making database from each value and index value
homelessness_2014_df = pd.DataFrame(list_2014_total_homeless, index = matching_citylist)
homelessness_2015_df = pd.DataFrame(list_2015_total_homeless, index = matching_citylist)

#giving name to indexes
homelessness_2014_df.index.name = 'County'
homelessness_2015_df.index.name = 'County'

#rename columns
homelessness_2014_df = homelessness_2014_df.rename(columns = {0 : 'Value_2014'})
homelessness_2015_df = homelessness_2015_df.rename(columns = {0 : 'Value_2015'})

In [34]:
homelessness_2014_df
homelessness_2014_df.to_csv('Homeless_2014.csv', index = True, header = True)

In [35]:
homelessness_2015_df
homelessness_2015_df.to_csv('Homeless_2015.csv', index = True, header = True)

In [33]:
merged_homelessness = pd.merge(homelessness_2014_df, homelessness_2015_df, on = 'County')
merged_homelessness

Unnamed: 0_level_0,Value_2014,Value_2015
County,Unnamed: 1_level_1,Unnamed: 2_level_1
Los_Angeles_County,34393,41174
San_Francisco_County,6408,6775
Clark_County,7443,7509
Denver_County,6621,5335
Jackson_County,1931,1446
Cook_County,1182,895
Fulton_County,477,473
Miami_Dade_County,4156,4152
Philadelphia_County,5738,5998
Maricopa_County,5918,5631
