In [1]:
# import our dependencies

%matplotlib inline
from collections import Counter
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import numpy as np

## PCA + Oversampling (Random Oversampling) + Logistic Regression (SFRs)

In [2]:
# bring in our dataframe

Hancock_Park_df = pd.read_csv('Hancock_Park_Cleaned.csv', index_col='PARCEL')
Hancock_Park_df.head()

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,SITEADDRESS,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,BATHROOMS,FAMILYRM,DININGRM,POOL,PATIO,FIREPLCE,AIRMTHOD,HEATMTHD,VIEW,Did it sell?
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5505026001,1,0,354 N Plymouth Blvd,1,0,0,2054958,1583885,471073,0,...,5,0,0,0,0,0,0,0,0,0
5515028023,1,0,143 N Plymouth Blvd,0,0,0,915420,592174,323246,0,...,2,0,0,0,0,0,0,0,0,0
5515022015,1,0,253 S Plymouth Blvd,0,0,0,1304706,731909,572797,0,...,3,0,0,0,0,0,0,0,0,1
5513010003,1,0,164 N Las Palmas Ave,0,0,0,2518796,1609956,908840,0,...,6,0,0,0,0,0,0,0,0,0
5515012030,1,0,108 S Rossmore Ave,1,0,0,1428176,999727,428449,0,...,7,0,0,0,0,0,0,0,0,0


In [3]:
# drop any nulls

Hancock_Park_df = Hancock_Park_df.dropna()

In [4]:
len(Hancock_Park_df)

5530

In [5]:
# Change the datatype of these two columns into ints

Hancock_Park_df['TAXAMT'] = Hancock_Park_df['TAXAMT'].astype(int)
Hancock_Park_df['EFFYRBLT'] = Hancock_Park_df['EFFYRBLT'].astype(int)

In [6]:
# Make sure our dataframe is only SFR's so exclude every property that has an '#' in its site address

Hancock_Park_df = Hancock_Park_df[~Hancock_Park_df['SITEADDRESS'].str.contains('#')]
Hancock_Park_df.head(3)

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,SITEADDRESS,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,BATHROOMS,FAMILYRM,DININGRM,POOL,PATIO,FIREPLCE,AIRMTHOD,HEATMTHD,VIEW,Did it sell?
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5505026001,1,0,354 N Plymouth Blvd,1,0,0,2054958,1583885,471073,0,...,5,0,0,0,0,0,0,0,0,0
5515028023,1,0,143 N Plymouth Blvd,0,0,0,915420,592174,323246,0,...,2,0,0,0,0,0,0,0,0,0
5515022015,1,0,253 S Plymouth Blvd,0,0,0,1304706,731909,572797,0,...,3,0,0,0,0,0,0,0,0,1


In [7]:
len(Hancock_Park_df)

5525

In [8]:
# Now drop the SiteAddress column
Hancock_Park_df.drop(['SITEADDRESS'], axis=1, inplace=True)

In [9]:
Hancock_Park_df.head(3)

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,EXEMPTCD,...,BATHROOMS,FAMILYRM,DININGRM,POOL,PATIO,FIREPLCE,AIRMTHOD,HEATMTHD,VIEW,Did it sell?
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5505026001,1,0,1,0,0,2054958,1583885,471073,0,0,...,5,0,0,0,0,0,0,0,0,0
5515028023,1,0,0,0,0,915420,592174,323246,0,0,...,2,0,0,0,0,0,0,0,0,0
5515022015,1,0,0,0,0,1304706,731909,572797,0,0,...,3,0,0,0,0,0,0,0,0,1


In [10]:
# Step 1: Create our X and y

y = Hancock_Park_df['Did it sell?']
X = Hancock_Park_df.drop(['Did it sell?'], 1)

In [11]:
APN = Hancock_Park_df.index.values
APN

array([5505026001, 5515028023, 5515022015, ..., 5507014003, 5507019023,
       5524038012])

In [12]:
# Step 2: Use PCA to reduce dimension to three principal components.
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
X_pca = pca.fit_transform(X)

In [13]:
X_pca

array([[ 1.05678823e+06, -5.40749780e+05, -7.99150429e+05],
       [-4.37316125e+05, -8.79156495e+04,  8.48396223e+04],
       [ 1.99411886e+05, -1.17643013e+05,  1.46161978e+05],
       ...,
       [-8.90418298e+05,  3.19670308e+04,  2.45564591e+04],
       [-8.40390671e+05,  3.38917753e+01,  2.46831758e+04],
       [-6.13625000e+05, -5.46815384e+03,  5.63046539e+04]])

In [14]:
# Step 3: Train, test, split

X_train, X_test, y_train, y_test = train_test_split(X_pca, y, random_state=78, stratify=y)

In [15]:
# Step 4: Scale our data

# Create the StandardScaler instance
scaler = StandardScaler()

# Fit our scaler, named 'scaler' to our data, which produces a new StandardScaler object
# which we call 'X_scaler'
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [16]:
X_train_scaled_APN = []

In [17]:
# implement random oversampling
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=1)
X_random_oversampled, y_random_oversampled = ros.fit_resample(X_train_scaled, y_train)

Counter(y_random_oversampled)

Counter({0: 3720, 1: 3720})

In [18]:
# Logistic regression using random oversampled data
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver='lbfgs', 
                                max_iter=300,
                                random_state=78,
                                class_weight="balanced")

model.fit(X_random_oversampled, y_random_oversampled)

LogisticRegression(class_weight='balanced', max_iter=300, random_state=78)

In [19]:
# Evaluate the model
y_pred = model.predict(X_test_scaled)

In [20]:
print(f" Logistic regression model accuracy: {accuracy_score(y_test, y_pred):.3f}")

 Logistic regression model accuracy: 0.687


In [21]:
# just for shits n' gigs, lets find the balanced accuracy score

from sklearn.metrics import balanced_accuracy_score
balanced_accuracy_score(y_test, y_pred)

0.5587749527091512

In [22]:
from sklearn.metrics import f1_score
f1_score(y_test, y_pred, average="weighted")

0.7442384369557266

In [23]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

          0       0.91      0.72      0.40      0.81      0.53      0.30      1241
          1       0.14      0.40      0.72      0.21      0.53      0.28       141

avg / total       0.83      0.69      0.43      0.74      0.53      0.29      1382



In [24]:
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test, "PARCEL": y_test.index.values}).reset_index(drop=True)
results.sample(20)

Unnamed: 0,Prediction,Actual,PARCEL
679,1,1,5523028037
1109,0,0,5081031009
1018,1,0,5523035024
612,0,0,5090002005
275,0,0,5092002010
2,0,0,5523030037
376,0,0,5082007018
920,1,0,5084006014
1172,0,1,5507007020
89,0,0,5082022024


In [25]:
Hancock_Park_Results_SFRs_df = Hancock_Park_df.merge(results, how="left", right_on="PARCEL", left_on="PARCEL")
Hancock_Park_Results_SFRs_df.head()

Unnamed: 0,PARCEL,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,DININGRM,POOL,PATIO,FIREPLCE,AIRMTHOD,HEATMTHD,VIEW,Did it sell?,Prediction,Actual
0,5505026001,1,0,1,0,0,2054958,1583885,471073,0,...,0,0,0,0,0,0,0,0,,
1,5515028023,1,0,0,0,0,915420,592174,323246,0,...,0,0,0,0,0,0,0,0,0.0,0.0
2,5515022015,1,0,0,0,0,1304706,731909,572797,0,...,0,0,0,0,0,0,0,1,,
3,5513010003,1,0,0,0,0,2518796,1609956,908840,0,...,0,0,0,0,0,0,0,0,,
4,5515012030,1,0,1,0,0,1428176,999727,428449,0,...,0,0,0,0,0,0,0,0,,


In [26]:
Hancock_Park_Results_SFRs_df

Unnamed: 0,PARCEL,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,DININGRM,POOL,PATIO,FIREPLCE,AIRMTHOD,HEATMTHD,VIEW,Did it sell?,Prediction,Actual
0,5505026001,1,0,1,0,0,2054958,1583885,471073,0,...,0,0,0,0,0,0,0,0,,
1,5515028023,1,0,0,0,0,915420,592174,323246,0,...,0,0,0,0,0,0,0,0,0.0,0.0
2,5515022015,1,0,0,0,0,1304706,731909,572797,0,...,0,0,0,0,0,0,0,1,,
3,5513010003,1,0,0,0,0,2518796,1609956,908840,0,...,0,0,0,0,0,0,0,0,,
4,5515012030,1,0,1,0,0,1428176,999727,428449,0,...,0,0,0,0,0,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5520,5513014003,0,0,0,0,0,688635,550912,137723,0,...,0,0,0,0,0,0,0,0,0.0,0.0
5521,5084006021,0,0,0,0,0,589308,280085,309223,0,...,0,0,0,0,0,0,0,0,,
5522,5507014003,0,0,1,0,0,609094,369478,239616,0,...,0,0,0,0,0,0,0,1,,
5523,5507019023,0,0,0,0,0,638147,459267,178880,0,...,0,0,0,0,0,0,0,0,,


In [27]:
Hancock_Park_Results_SFRs_df.to_csv('Hancock_Park_Results_SFRs.csv')

# PCA + Oversampling (Random Oversampling) + Logistic Regression (Condos)

In [28]:
# bring in our dataframe

Hancock_Park_df = pd.read_csv('Hancock_Park_Cleaned.csv', index_col='PARCEL')
Hancock_Park_df.head()

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,SITEADDRESS,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,BATHROOMS,FAMILYRM,DININGRM,POOL,PATIO,FIREPLCE,AIRMTHOD,HEATMTHD,VIEW,Did it sell?
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5505026001,1,0,354 N Plymouth Blvd,1,0,0,2054958,1583885,471073,0,...,5,0,0,0,0,0,0,0,0,0
5515028023,1,0,143 N Plymouth Blvd,0,0,0,915420,592174,323246,0,...,2,0,0,0,0,0,0,0,0,0
5515022015,1,0,253 S Plymouth Blvd,0,0,0,1304706,731909,572797,0,...,3,0,0,0,0,0,0,0,0,1
5513010003,1,0,164 N Las Palmas Ave,0,0,0,2518796,1609956,908840,0,...,6,0,0,0,0,0,0,0,0,0
5515012030,1,0,108 S Rossmore Ave,1,0,0,1428176,999727,428449,0,...,7,0,0,0,0,0,0,0,0,0


In [29]:
# drop any nulls

Hancock_Park_df = Hancock_Park_df.dropna()

In [30]:
len(Hancock_Park_df)

5530

In [31]:
# Change the datatype of these two columns into ints

Hancock_Park_df['TAXAMT'] = Hancock_Park_df['TAXAMT'].astype(int)
Hancock_Park_df['EFFYRBLT'] = Hancock_Park_df['EFFYRBLT'].astype(int)

In [32]:
# Make sure our dataframe is only Condos

Hancock_Park_df = Hancock_Park_df[Hancock_Park_df['SITEADDRESS'].str.contains('#')]
Hancock_Park_df.head(3)

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,SITEADDRESS,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,BATHROOMS,FAMILYRM,DININGRM,POOL,PATIO,FIREPLCE,AIRMTHOD,HEATMTHD,VIEW,Did it sell?
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5505008010,1,0,546 S Rimpau Blvd #2200,0,0,0,3491436,2450134,1041302,0,...,6,0,0,0,0,0,0,0,0,0
5515029015,1,0,169 S Plymouth Blvd #100,0,0,0,3122716,2402093,720623,0,...,4,0,0,0,0,0,0,0,0,0
5522008053,0,0,5114 Melrose Ave #8,0,0,0,790127,548843,241284,0,...,3,0,0,0,0,0,0,0,0,1


In [33]:
len(Hancock_Park_df)

5

In [39]:
# Now drop the SiteAddress column
Hancock_Park_df.drop(['SITEADDRESS'], axis=1, inplace=True)

In [40]:
Hancock_Park_df.head(3)

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,EXEMPTCD,...,BATHROOMS,FAMILYRM,DININGRM,POOL,PATIO,FIREPLCE,AIRMTHOD,HEATMTHD,VIEW,Did it sell?
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5505008010,1,0,0,0,0,3491436,2450134,1041302,0,0,...,6,0,0,0,0,0,0,0,0,0
5515029015,1,0,0,0,0,3122716,2402093,720623,0,0,...,4,0,0,0,0,0,0,0,0,0
5522008053,0,0,0,0,0,790127,548843,241284,0,0,...,3,0,0,0,0,0,0,0,0,1


In [41]:
# Step 1: Create our X and y

y = Hancock_Park_df['Did it sell?']
X = Hancock_Park_df.drop(['Did it sell?'], 1)

In [42]:
APN = Hancock_Park_df.index.values
APN

array([5505008010, 5515029015, 5522008053, 5522008048, 5516014005])

In [43]:
# Step 2: Use PCA to reduce dimension to three principal components.
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
X_pca = pca.fit_transform(X)

In [44]:
X_pca

array([[ 2685213.96624914,   487711.4316359 ,   -18072.9730726 ],
       [ 2436634.88899754,  -457406.79477098,    50496.47587626],
       [-1276644.47457764,  -114043.18418837,   -65881.3034359 ],
       [-1315693.5260042 ,  -106153.72398376,   -60953.82359048],
       [-2529510.85466484,   189892.27130722,    94411.62422273]])

In [45]:
# Step 3: Train, test, split

X_train, X_test, y_train, y_test = train_test_split(X_pca, y, random_state=78, stratify=y)

In [46]:
# Step 4: Scale our data

# Create the StandardScaler instance
scaler = StandardScaler()

# Fit our scaler, named 'scaler' to our data, which produces a new StandardScaler object
# which we call 'X_scaler'
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [47]:
X_train_scaled_APN = []

In [48]:
# implement random oversampling
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=1)
X_random_oversampled, y_random_oversampled = ros.fit_resample(X_train_scaled, y_train)

Counter(y_random_oversampled)

Counter({1: 2, 0: 2})

In [49]:
# Logistic regression using random oversampled data
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver='lbfgs', 
                                max_iter=300,
                                random_state=78,
                                class_weight="balanced")

model.fit(X_random_oversampled, y_random_oversampled)

LogisticRegression(class_weight='balanced', max_iter=300, random_state=78)

In [50]:
# Evaluate the model
y_pred = model.predict(X_test_scaled)

In [51]:
print(f" Logistic regression model accuracy: {accuracy_score(y_test, y_pred):.3f}")

 Logistic regression model accuracy: 0.500


In [52]:
# just for shits n' gigs, lets find the balanced accuracy score

from sklearn.metrics import balanced_accuracy_score
balanced_accuracy_score(y_test, y_pred)

0.5

In [53]:
from sklearn.metrics import f1_score
f1_score(y_test, y_pred, average="weighted")

0.3333333333333333

In [54]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

          0       0.00      0.00      1.00      0.00      0.00      0.00         1
          1       0.50      1.00      0.00      0.67      0.00      0.00         1

avg / total       0.25      0.50      0.50      0.33      0.00      0.00         2



  _warn_prf(average, modifier, msg_start, len(result))


In [56]:
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test, "PARCEL": y_test.index.values}).reset_index(drop=True)
results

Unnamed: 0,Prediction,Actual,PARCEL
0,1,1,5522008048
1,1,0,5515029015


In [57]:
Hancock_Park_Results_Condos_df = Hancock_Park_df.merge(results, how="left", right_on="PARCEL", left_on="PARCEL")
Hancock_Park_Results_Condos_df.head()

Unnamed: 0,PARCEL,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,DININGRM,POOL,PATIO,FIREPLCE,AIRMTHOD,HEATMTHD,VIEW,Did it sell?,Prediction,Actual
0,5505008010,1,0,0,0,0,3491436,2450134,1041302,0,...,0,0,0,0,0,0,0,0,,
1,5515029015,1,0,0,0,0,3122716,2402093,720623,0,...,0,0,0,0,0,0,0,0,1.0,0.0
2,5522008053,0,0,0,0,0,790127,548843,241284,0,...,0,0,0,0,0,0,0,1,,
3,5522008048,0,0,0,0,0,767344,533310,234034,0,...,0,0,0,0,0,0,0,1,1.0,1.0
4,5516014005,1,0,0,0,0,83813,43702,40111,0,...,0,0,0,0,0,0,0,0,,


In [58]:
Hancock_Park_Results_Condos_df

Unnamed: 0,PARCEL,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,DININGRM,POOL,PATIO,FIREPLCE,AIRMTHOD,HEATMTHD,VIEW,Did it sell?,Prediction,Actual
0,5505008010,1,0,0,0,0,3491436,2450134,1041302,0,...,0,0,0,0,0,0,0,0,,
1,5515029015,1,0,0,0,0,3122716,2402093,720623,0,...,0,0,0,0,0,0,0,0,1.0,0.0
2,5522008053,0,0,0,0,0,790127,548843,241284,0,...,0,0,0,0,0,0,0,1,,
3,5522008048,0,0,0,0,0,767344,533310,234034,0,...,0,0,0,0,0,0,0,1,1.0,1.0
4,5516014005,1,0,0,0,0,83813,43702,40111,0,...,0,0,0,0,0,0,0,0,,


In [59]:
Hancock_Park_Results_Condos_df.to_csv('Hancock_Park_Results_Condos.csv')