## This file is the data analysis for Austin Rent vs Own Percentage over several years
Data source: https://datausa.io/profile/geo/austin-tx/?race-income-income_geo=incomeRace5

In [1]:
# Import Necessary Libraries
import numpy as np
import pandas as pd

In [2]:
# Load in data sets

# Set names of data set files for local access
path_1 = 'austin_rent_own.csv'

austin_rent_own_df = pd.read_csv(path_1)

### Sanity Checks

#### Data set 1

In [3]:
austin_rent_own_df.shape

(77, 8)

In [4]:
austin_rent_own_df.head(5)

Unnamed: 0,Place ID,Place,Occupied By ID,Occupied By,Year,Household Ownership,Household Ownership Moe,share
0,16000US4805000,"Austin, TX",0,Owner Occupied,2013,152054,1840.0,0.450142
1,16000US4805000,"Austin, TX",0,Owner Occupied,2014,154410,2165.0,0.448489
2,16000US4805000,"Austin, TX",0,Owner Occupied,2015,157382,1931.0,0.448133
3,16000US4805000,"Austin, TX",0,Owner Occupied,2016,162271,2287.0,0.452764
4,16000US4805000,"Austin, TX",0,Owner Occupied,2017,163486,2068.0,0.452548


In [5]:
austin_rent_own_df.tail(5)

Unnamed: 0,Place ID,Place,Occupied By ID,Occupied By,Year,Household Ownership,Household Ownership Moe,share
72,05000US48491,"Williamson County, TX",0,Owner Occupied,2019,123208,1606.0,0.683881
73,05000US48491,"Williamson County, TX",0,Owner Occupied,2020,128240,1713.0,0.681718
74,05000US48491,"Williamson County, TX",0,Owner Occupied,2021,147624,1736.0,0.677858
75,05000US48491,"Williamson County, TX",0,Owner Occupied,2022,155568,2023.0,0.676659
76,05000US48491,"Williamson County, TX",0,Owner Occupied,2023,162175,1941.0,0.670827


In [6]:
austin_rent_own_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 77 entries, 0 to 76
Data columns (total 8 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Place ID                 77 non-null     object 
 1   Place                    77 non-null     object 
 2   Occupied By ID           77 non-null     int64  
 3   Occupied By              77 non-null     object 
 4   Year                     77 non-null     int64  
 5   Household Ownership      77 non-null     int64  
 6   Household Ownership Moe  77 non-null     float64
 7   share                    77 non-null     float64
dtypes: float64(2), int64(3), object(3)
memory usage: 4.9+ KB


In [7]:
austin_rent_own_df.describe()

Unnamed: 0,Occupied By ID,Year,Household Ownership,Household Ownership Moe,share
count,77.0,77.0,77.0,77.0,77.0
mean,0.0,2018.0,12079340.0,56336.974026,0.591254
std,0.0,3.183014,26974900.0,122714.472758,0.074892
min,0.0,2013.0,37732.0,749.0,0.444426
25%,0.0,2015.0,147624.0,1736.0,0.529885
50%,0.0,2018.0,239208.0,2630.0,0.620795
75%,0.0,2021.0,5693770.0,28353.0,0.641139
max,0.0,2023.0,82892040.0,377633.0,0.689264


### Remove unecessary columns

In [8]:
# Make a cleaned copy
austin_rent_own_df_cleaned = austin_rent_own_df.drop(
    columns=["Place ID", "Occupied By ID", "Household Ownership Moe"]
).copy()

# Quick check
print(austin_rent_own_df_cleaned.head())
print(austin_rent_own_df_cleaned['Place'].unique())

        Place     Occupied By  Year  Household Ownership     share
0  Austin, TX  Owner Occupied  2013               152054  0.450142
1  Austin, TX  Owner Occupied  2014               154410  0.448489
2  Austin, TX  Owner Occupied  2015               157382  0.448133
3  Austin, TX  Owner Occupied  2016               162271  0.452764
4  Austin, TX  Owner Occupied  2017               163486  0.452548
['Austin, TX' 'United States' 'Texas' 'Austin-Round Rock, TX'
 'Hays County, TX' 'Travis County, TX' 'Williamson County, TX']


### Remove non Austin,TX data points

In [10]:
# Keep only Austin, TX rows
austin_rent_own_df_cleaned = austin_rent_own_df[austin_rent_own_df['Place'] == "Austin, TX"].copy()

# Check the result
print(austin_rent_own_df_cleaned['Place'].unique())
print(austin_rent_own_df_cleaned.head())


['Austin, TX']
         Place ID       Place  Occupied By ID     Occupied By  Year  \
0  16000US4805000  Austin, TX               0  Owner Occupied  2013   
1  16000US4805000  Austin, TX               0  Owner Occupied  2014   
2  16000US4805000  Austin, TX               0  Owner Occupied  2015   
3  16000US4805000  Austin, TX               0  Owner Occupied  2016   
4  16000US4805000  Austin, TX               0  Owner Occupied  2017   

   Household Ownership  Household Ownership Moe     share  
0               152054                   1840.0  0.450142  
1               154410                   2165.0  0.448489  
2               157382                   1931.0  0.448133  
3               162271                   2287.0  0.452764  
4               163486                   2068.0  0.452548  


In [12]:
austin_rent_own_df_cleaned.shape

(11, 8)

In [13]:
austin_rent_own_df_cleaned.head()

Unnamed: 0,Place ID,Place,Occupied By ID,Occupied By,Year,Household Ownership,Household Ownership Moe,share
0,16000US4805000,"Austin, TX",0,Owner Occupied,2013,152054,1840.0,0.450142
1,16000US4805000,"Austin, TX",0,Owner Occupied,2014,154410,2165.0,0.448489
2,16000US4805000,"Austin, TX",0,Owner Occupied,2015,157382,1931.0,0.448133
3,16000US4805000,"Austin, TX",0,Owner Occupied,2016,162271,2287.0,0.452764
4,16000US4805000,"Austin, TX",0,Owner Occupied,2017,163486,2068.0,0.452548


In [14]:
austin_rent_own_df_cleaned.tail(5)

Unnamed: 0,Place ID,Place,Occupied By ID,Occupied By,Year,Household Ownership,Household Ownership Moe,share
6,16000US4805000,"Austin, TX",0,Owner Occupied,2019,171685,2109.0,0.451337
7,16000US4805000,"Austin, TX",0,Owner Occupied,2020,179897,2732.0,0.455113
8,16000US4805000,"Austin, TX",0,Owner Occupied,2021,180557,2403.0,0.446789
9,16000US4805000,"Austin, TX",0,Owner Occupied,2022,188029,2553.0,0.44443
10,16000US4805000,"Austin, TX",0,Owner Occupied,2023,195678,3249.0,0.444426


In [15]:
austin_rent_own_df_cleaned.describe()

Unnamed: 0,Occupied By ID,Year,Household Ownership,Household Ownership Moe,share
count,11.0,11.0,11.0,11.0,11.0
mean,0.0,2018.0,170241.363636,2337.181818,0.449639
std,0.0,3.316625,14248.862413,401.033619,0.003497
min,0.0,2013.0,152054.0,1840.0,0.444426
25%,0.0,2015.5,159826.5,2088.5,0.447461
50%,0.0,2018.0,167206.0,2287.0,0.450142
75%,0.0,2020.5,180227.0,2478.0,0.452202
max,0.0,2023.0,195678.0,3249.0,0.455113


In [16]:
# Save to CSV
austin_rent_own_df_cleaned.to_csv("austin_RentOwn_cleaned.csv", index=False)