In [1]:
# import our dependencies

%matplotlib inline
from collections import Counter
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import numpy as np

## PCA + Oversampling (Random Oversampling) + Logistic Regression (SFRs)

In [2]:
# bring in our dataframe

Hollywood_df = pd.read_csv('Hollywood_Cleaned.csv', index_col='PARCEL')
Hollywood_df.head()

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,SITEADDRESS,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,Other,PARTNERSHIP,QUIT CLAIM,RE-RECORD,SHERIFFS,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5524005031,0,0,815 N Las Palmas Ave #101,1,0,0,189433,47704,141729,0,...,0,0,0,0,0,0,1,0,0,0
5537019032,0,0,1062 N Kingsley Dr,1,0,1,113490,112082,1408,0,...,1,0,0,0,0,0,0,0,0,0
5535011014,0,0,835 N Western Ave,1,0,0,269134,235503,33631,0,...,0,0,0,0,0,0,0,0,0,0
5537009909,0,0,1107 N Kingsley Dr,1,0,0,67146,67146,0,0,...,0,0,0,0,0,0,0,0,0,0
5534006007,0,0,6219 Banner Ave,0,0,0,421839,186956,234883,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
# drop any nulls

Hollywood_df = Hollywood_df.dropna()

In [4]:
len(Hollywood_df)

3470

In [5]:
# Change the datatype of these two columns into ints

Hollywood_df['TAXAMT'] = Hollywood_df['TAXAMT'].astype(int)
Hollywood_df['EFFYRBLT'] = Hollywood_df['EFFYRBLT'].astype(int)

In [6]:
# Make sure our dataframe is only SFR's so exclude every property that has an '#' in its site address

Hollywood_df = Hollywood_df[~Hollywood_df['SITEADDRESS'].str.contains('#')]
Hollywood_df.head(3)

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,SITEADDRESS,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,Other,PARTNERSHIP,QUIT CLAIM,RE-RECORD,SHERIFFS,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5537019032,0,0,1062 N Kingsley Dr,1,0,1,113490,112082,1408,0,...,1,0,0,0,0,0,0,0,0,0
5535011014,0,0,835 N Western Ave,1,0,0,269134,235503,33631,0,...,0,0,0,0,0,0,0,0,0,0
5537009909,0,0,1107 N Kingsley Dr,1,0,0,67146,67146,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
len(Hollywood_df)

2068

In [8]:
# Now drop the SiteAddress column
Hollywood_df.drop(['SITEADDRESS'], axis=1, inplace=True)

In [9]:
Hollywood_df.head(3)

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,EXEMPTCD,...,Other,PARTNERSHIP,QUIT CLAIM,RE-RECORD,SHERIFFS,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5537019032,0,0,1,0,1,113490,112082,1408,0,0,...,1,0,0,0,0,0,0,0,0,0
5535011014,0,0,1,0,0,269134,235503,33631,0,0,...,0,0,0,0,0,0,0,0,0,0
5537009909,0,0,1,0,0,67146,67146,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
# Step 1: Create our X and y

y = Hollywood_df['Did it sell?']
X = Hollywood_df.drop(['Did it sell?'], 1)

In [11]:
APN = Hollywood_df.index.values
APN

array([5537019032, 5535011014, 5537009909, ..., 5533002083, 5534017019,
       5550019006])

In [12]:
# Step 2: Use PCA to reduce dimension to three principal components.
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
X_pca = pca.fit_transform(X)

In [13]:
X_pca

array([[-555143.51062687,   93214.34534781,  396355.15522728],
       [-393313.13213264,  -37677.19686051,  246378.1295548 ],
       [-566864.5461325 ,  105734.21685615,  456274.71260102],
       ...,
       [-404925.04889882,  -75115.40820629, -402390.97295949],
       [  69785.28229203, -406120.04579267, -135955.35954028],
       [8016695.09182898, 1176187.92618088,  125682.91168012]])

In [14]:
# Step 3: Train, test, split

X_train, X_test, y_train, y_test = train_test_split(X_pca, y, random_state=78, stratify=y)

In [15]:
# Step 4: Scale our data

# Create the StandardScaler instance
scaler = StandardScaler()

# Fit our scaler, named 'scaler' to our data, which produces a new StandardScaler object
# which we call 'X_scaler'
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [16]:
X_train_scaled_APN = []

In [17]:
# implement random oversampling
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=1)
X_random_oversampled, y_random_oversampled = ros.fit_resample(X_train_scaled, y_train)

Counter(y_random_oversampled)

Counter({0: 1325, 1: 1325})

In [18]:
# Logistic regression using random oversampled data
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver='lbfgs', 
                                max_iter=300,
                                random_state=78,
                                class_weight="balanced")

model.fit(X_random_oversampled, y_random_oversampled)

LogisticRegression(class_weight='balanced', max_iter=300, random_state=78)

In [19]:
# Evaluate the model
y_pred = model.predict(X_test_scaled)

In [20]:
print(f" Logistic regression model accuracy: {accuracy_score(y_test, y_pred):.3f}")

 Logistic regression model accuracy: 0.600


In [21]:
# just for shits n' gigs, lets find the balanced accuracy score

from sklearn.metrics import balanced_accuracy_score
balanced_accuracy_score(y_test, y_pred)

0.4930480964315551

In [22]:
from sklearn.metrics import f1_score
f1_score(y_test, y_pred, average="weighted")

0.6546789358594438

In [23]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

          0       0.85      0.64      0.34      0.73      0.47      0.23       441
          1       0.14      0.34      0.64      0.20      0.47      0.21        76

avg / total       0.75      0.60      0.39      0.65      0.47      0.23       517



In [24]:
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test, "PARCEL": y_test.index.values}).reset_index(drop=True)
results.sample(20)

Unnamed: 0,Prediction,Actual,PARCEL
381,0,0,5550006019
170,0,0,5533024040
66,0,0,5536015016
140,0,0,5544027907
14,0,0,5538003003
462,0,0,5547030022
88,1,0,5547026046
207,0,0,5543013004
458,1,0,5533035018
178,1,0,5586029011


In [25]:
Hollywood_Results_SFRs_df = Hollywood_df.merge(results, how="left", right_on="PARCEL", left_on="PARCEL")
Hollywood_Results_SFRs_df.head()

Unnamed: 0,PARCEL,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,QUIT CLAIM,RE-RECORD,SHERIFFS,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY,Prediction,Actual
0,5537019032,0,0,1,0,1,113490,112082,1408,0,...,0,0,0,0,0,0,0,0,0.0,0.0
1,5535011014,0,0,1,0,0,269134,235503,33631,0,...,0,0,0,0,0,0,0,0,0.0,0.0
2,5537009909,0,0,1,0,0,67146,67146,0,0,...,0,0,0,0,0,0,0,0,,
3,5534006007,0,0,0,0,0,421839,186956,234883,0,...,0,0,0,0,0,0,0,0,,
4,5536009007,0,0,1,0,0,299736,239794,59942,0,...,0,0,0,0,0,1,0,0,,


In [26]:
Hollywood_Results_SFRs_df

Unnamed: 0,PARCEL,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,QUIT CLAIM,RE-RECORD,SHERIFFS,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY,Prediction,Actual
0,5537019032,0,0,1,0,1,113490,112082,1408,0,...,0,0,0,0,0,0,0,0,0.0,0.0
1,5535011014,0,0,1,0,0,269134,235503,33631,0,...,0,0,0,0,0,0,0,0,0.0,0.0
2,5537009909,0,0,1,0,0,67146,67146,0,0,...,0,0,0,0,0,0,0,0,,
3,5534006007,0,0,0,0,0,421839,186956,234883,0,...,0,0,0,0,0,0,0,0,,
4,5536009007,0,0,1,0,0,299736,239794,59942,0,...,0,0,0,0,0,1,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2063,5533024040,0,0,0,0,0,383148,383148,0,0,...,0,0,0,0,0,0,0,0,0.0,0.0
2064,5533024031,0,0,1,0,1,383148,383148,0,0,...,0,0,0,0,0,0,0,0,,
2065,5533002083,0,0,0,0,0,786818,533006,253812,0,...,0,0,0,0,0,0,0,0,,
2066,5534017019,0,1,1,0,0,650146,650146,0,0,...,0,0,0,0,0,0,0,0,,


In [27]:
Hollywood_Results_SFRs_df.to_csv('Hollywood_Results_SFRs.csv')

# PCA + Oversampling (Random Oversampling) + Logistic Regression (Condos)

In [28]:
# bring in our dataframe

Hollywood_df = pd.read_csv('Hollywood_Cleaned.csv', index_col='PARCEL')
Hollywood_df.head()

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,SITEADDRESS,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,Other,PARTNERSHIP,QUIT CLAIM,RE-RECORD,SHERIFFS,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5524005031,0,0,815 N Las Palmas Ave #101,1,0,0,189433,47704,141729,0,...,0,0,0,0,0,0,1,0,0,0
5537019032,0,0,1062 N Kingsley Dr,1,0,1,113490,112082,1408,0,...,1,0,0,0,0,0,0,0,0,0
5535011014,0,0,835 N Western Ave,1,0,0,269134,235503,33631,0,...,0,0,0,0,0,0,0,0,0,0
5537009909,0,0,1107 N Kingsley Dr,1,0,0,67146,67146,0,0,...,0,0,0,0,0,0,0,0,0,0
5534006007,0,0,6219 Banner Ave,0,0,0,421839,186956,234883,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
# drop any nulls

Hollywood_df = Hollywood_df.dropna()

In [31]:
len(Hollywood_df)

3470

In [32]:
# Change the datatype of these two columns into ints

Hollywood_df['TAXAMT'] = Hollywood_df['TAXAMT'].astype(int)
Hollywood_df['EFFYRBLT'] = Hollywood_df['EFFYRBLT'].astype(int)

In [33]:
# Make sure our dataframe is only Condos

Hollywood_df = Hollywood_df[Hollywood_df['SITEADDRESS'].str.contains('#')]
Hollywood_df.head(3)

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,SITEADDRESS,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,Other,PARTNERSHIP,QUIT CLAIM,RE-RECORD,SHERIFFS,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5524005031,0,0,815 N Las Palmas Ave #101,1,0,0,189433,47704,141729,0,...,0,0,0,0,0,0,1,0,0,0
5550007039,0,0,7600 Hollywood Blvd #301,0,0,1,211875,62367,149508,0,...,0,0,0,0,0,0,0,1,0,0
5548011042,0,0,1534 N Formosa Ave #2,1,0,1,391498,240284,151214,0,...,0,0,0,0,0,0,0,0,0,0


In [34]:
len(Hollywood_df)

1402

In [35]:
# Now drop the SiteAddress column
Hollywood_df.drop(['SITEADDRESS'], axis=1, inplace=True)

In [36]:
Hollywood_df.head(3)

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,EXEMPTCD,...,Other,PARTNERSHIP,QUIT CLAIM,RE-RECORD,SHERIFFS,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5524005031,0,0,1,0,0,189433,47704,141729,0,0,...,0,0,0,0,0,0,1,0,0,0
5550007039,0,0,0,0,1,211875,62367,149508,0,0,...,0,0,0,0,0,0,0,1,0,0
5548011042,0,0,1,0,1,391498,240284,151214,0,0,...,0,0,0,0,0,0,0,0,0,0


In [37]:
# Step 1: Create our X and y

y = Hollywood_df['Did it sell?']
X = Hollywood_df.drop(['Did it sell?'], 1)

In [38]:
APN = Hollywood_df.index.values
APN

array([5524005031, 5550007039, 5548011042, ..., 5537006007, 5537006008,
       5537006009])

In [39]:
# Step 2: Use PCA to reduce dimension to three principal components.
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
X_pca = pca.fit_transform(X)

In [40]:
X_pca

array([[-5.28595644e+05,  5.06460483e+02, -1.05495214e+05],
       [-5.64580822e+05,  4.87284323e+04, -5.00437159e+04],
       [ 7.10513338e+04, -1.12052182e+05, -3.97568554e+05],
       ...,
       [-5.76411878e+05,  4.30915832e+04, -6.46774775e+04],
       [-1.05538323e+05,  6.17010290e+04, -1.83067142e+05],
       [-8.65427690e+04,  4.45802677e+04, -1.73785610e+05]])

In [41]:
# Step 3: Train, test, split

X_train, X_test, y_train, y_test = train_test_split(X_pca, y, random_state=78, stratify=y)

In [42]:
# Step 4: Scale our data

# Create the StandardScaler instance
scaler = StandardScaler()

# Fit our scaler, named 'scaler' to our data, which produces a new StandardScaler object
# which we call 'X_scaler'
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [43]:
X_train_scaled_APN = []

In [44]:
# implement random oversampling
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=1)
X_random_oversampled, y_random_oversampled = ros.fit_resample(X_train_scaled, y_train)

Counter(y_random_oversampled)

Counter({0: 831, 1: 831})

In [45]:
# Logistic regression using random oversampled data
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver='lbfgs', 
                                max_iter=300,
                                random_state=78,
                                class_weight="balanced")

model.fit(X_random_oversampled, y_random_oversampled)

LogisticRegression(class_weight='balanced', max_iter=300, random_state=78)

In [46]:
# Evaluate the model
y_pred = model.predict(X_test_scaled)

In [47]:
print(f" Logistic regression model accuracy: {accuracy_score(y_test, y_pred):.3f}")

 Logistic regression model accuracy: 0.513


In [48]:
# just for shits n' gigs, lets find the balanced accuracy score

from sklearn.metrics import balanced_accuracy_score
balanced_accuracy_score(y_test, y_pred)

0.5257711638908051

In [49]:
from sklearn.metrics import f1_score
f1_score(y_test, y_pred, average="weighted")

0.5580090236586204

In [50]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

          0       0.81      0.50      0.55      0.62      0.53      0.27       278
          1       0.22      0.55      0.50      0.32      0.53      0.28        73

avg / total       0.69      0.51      0.54      0.56      0.53      0.28       351



In [51]:
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test, "PARCEL": y_test.index.values}).reset_index(drop=True)
results

Unnamed: 0,Prediction,Actual,PARCEL
0,1,0,5586029036
1,1,1,5545006048
2,0,1,5545007092
3,1,0,5550016048
4,0,1,5545006041
...,...,...,...
346,1,0,5550017073
347,1,0,5546030090
348,0,0,5533026041
349,1,0,5533026068


In [52]:
Hollywood_Results_Condos_df = Hollywood_df.merge(results, how="left", right_on="PARCEL", left_on="PARCEL")
Hollywood_Results_Condos_df.head()

Unnamed: 0,PARCEL,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,QUIT CLAIM,RE-RECORD,SHERIFFS,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY,Prediction,Actual
0,5524005031,0,0,1,0,0,189433,47704,141729,0,...,0,0,0,0,1,0,0,0,,
1,5550007039,0,0,0,0,1,211875,62367,149508,0,...,0,0,0,0,0,1,0,0,,
2,5548011042,0,0,1,0,1,391498,240284,151214,0,...,0,0,0,0,0,0,0,0,,
3,5550016028,0,0,0,0,1,399327,99828,299499,0,...,1,0,0,0,0,0,0,0,,
4,5550016036,0,0,0,0,1,487013,219264,267749,0,...,0,0,0,0,0,0,0,0,,


In [53]:
Hollywood_Results_Condos_df

Unnamed: 0,PARCEL,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,QUIT CLAIM,RE-RECORD,SHERIFFS,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY,Prediction,Actual
0,5524005031,0,0,1,0,0,189433,47704,141729,0,...,0,0,0,0,1,0,0,0,,
1,5550007039,0,0,0,0,1,211875,62367,149508,0,...,0,0,0,0,0,1,0,0,,
2,5548011042,0,0,1,0,1,391498,240284,151214,0,...,0,0,0,0,0,0,0,0,,
3,5550016028,0,0,0,0,1,399327,99828,299499,0,...,1,0,0,0,0,0,0,0,,
4,5550016036,0,0,0,0,1,487013,219264,267749,0,...,0,0,0,0,0,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1397,5550024036,0,0,1,0,1,489376,240650,248726,0,...,0,0,0,0,0,0,0,0,,
1398,5550024041,0,0,0,0,1,373377,228031,145346,0,...,0,0,0,0,0,0,0,0,,
1399,5537006007,0,0,1,0,0,193843,22065,171778,0,...,0,0,0,0,0,0,0,0,,
1400,5537006008,0,0,0,0,1,449374,271845,177529,0,...,0,0,0,0,0,0,0,0,,


In [55]:
Hollywood_Results_Condos_df.to_csv('Hollywood_Results_Condos.csv')