### Importing Dependencies

In [1]:
# Dependencies
import pandas as pd
import numpy as np

### Creating Dataframe from the csv file

In [2]:
#Create a dataframe from the csv file
rent_df = pd.read_csv("rent_raw.csv")
rent_df.head()

Unnamed: 0,Region Name,Region Type,Data Type,Nov-10,Dec-10,Jan-11,Feb-11,Mar-11,Apr-11,May-11,...,Dec-17,Jan-18,Feb-18,Mar-18,Apr-18,May-18,Jun-18,Jul-18,Aug-18,Sep-18
0,Bayview,neighborhood,All Homes,3183,3179,3078,2917,2706,2571,2480,...,3713,3663,3624,3602,3579,3558,3535,3519,3508,3502
1,Bernal Heights,neighborhood,All Homes,3155,3146,3110,3013,2882,2798,2763,...,4490,4470,4457,4448,4444,4440,4443,4451,4459,4465
2,Buena Vista,neighborhood,All Homes,3551,3604,3636,3601,3514,3413,3362,...,5207,5155,5139,5158,5187,5203,5201,5198,5203,5212
3,Corona Heights,neighborhood,All Homes,3504,3544,3551,3500,3406,3316,3261,...,5082,5070,5057,5060,5069,5082,5086,5087,5087,5083
4,Cow Hollow,neighborhood,All Homes,4243,4401,4438,4412,4324,4319,4370,...,6050,6061,6052,6034,6009,5979,5964,5972,6013,6065


### Cleaning Data and Creating new DataFrame

In [3]:
# Remove the Region Type, Data Type columns
rent_df = rent_df.drop(["Region Type", "Data Type"], axis = 1)

In [4]:
# Rename the columns
rent_df = rent_df.rename(columns = {"Region Name": "Neighborhood"})
rent_df.head()

Unnamed: 0,Neighborhood,Nov-10,Dec-10,Jan-11,Feb-11,Mar-11,Apr-11,May-11,Jun-11,Jul-11,...,Dec-17,Jan-18,Feb-18,Mar-18,Apr-18,May-18,Jun-18,Jul-18,Aug-18,Sep-18
0,Bayview,3183,3179,3078,2917,2706,2571,2480,2450,2437,...,3713,3663,3624,3602,3579,3558,3535,3519,3508,3502
1,Bernal Heights,3155,3146,3110,3013,2882,2798,2763,2767,2780,...,4490,4470,4457,4448,4444,4440,4443,4451,4459,4465
2,Buena Vista,3551,3604,3636,3601,3514,3413,3362,3371,3428,...,5207,5155,5139,5158,5187,5203,5201,5198,5203,5212
3,Corona Heights,3504,3544,3551,3500,3406,3316,3261,3265,3307,...,5082,5070,5057,5060,5069,5082,5086,5087,5087,5083
4,Cow Hollow,4243,4401,4438,4412,4324,4319,4370,4442,4495,...,6050,6061,6052,6034,6009,5979,5964,5972,6013,6065


In [5]:
# Get a series of neighbourhoods
neighborhood = rent_df["Neighborhood"]

In [6]:
# Create a function that returns the sum per row per year

def totals(df,str): # where str is the last two digits of the year
    year = df.filter(regex = str, axis = 1) # filter the columns based on the str
    
    return year.sum(axis = 1) # get the sum per row

In [7]:
# Create a list of years (with data for 12 months)
year_list = list(np.arange(11,18)) # creates a list of floats covering the year range of rent_df
year_list_str = [str(item) for item in year_list] # converts numbers to string
    
year_list_str

['11', '12', '13', '14', '15', '16', '17']

In [8]:
yearly_rate = [] # leads to a list of series

for year in year_list_str:
    yearly_rate.append(totals(rent_df,year))

In [9]:
# Create a dictionary
year_rent = pd.DataFrame(dict({"Neighborhood": neighborhood,
                               "2011": yearly_rate[0],
                               "2012": yearly_rate[1],
                               "2013": yearly_rate[2],
                               "2014": yearly_rate[3],
                               "2015": yearly_rate[4],
                               "2016": yearly_rate[5],
                               "2017": yearly_rate[6]}))

year_rent.head()

Unnamed: 0,Neighborhood,2011,2012,2013,2014,2015,2016,2017
0,Bayview,30723,28821,30433,35338,42870,45681,45747
1,Bernal Heights,34471,35739,38924,43654,53977,54833,53741
2,Buena Vista,42407,45678,49364,53889,61646,65690,61917
3,Corona Heights,41051,44269,48263,52768,61781,64072,59849
4,Cow Hollow,52856,52816,56455,62256,75947,78557,71952


In [10]:
# Save the file as csv file in the Data folder
year_rent.to_csv("../Data/yearly_rent.csv", encoding = "utf-8")