In [1]:
# import our dependencies

%matplotlib inline
from collections import Counter
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import numpy as np
import os

## PCA + Oversampling (Random Oversampling) + Logistic Regression (SFRs)

In [2]:
file = 'West_Hollywood_Cleaned.csv'

location = '/Users/Admin/Desktop/GW_Bootcamp/Analysis_Projects/Final_Project_Team_1/'

In [3]:
path = os.path.join(location, file)

In [4]:
# bring in our dataframe

West_Hollywood_df = pd.read_csv(path, index_col='PARCEL')
West_Hollywood_df.head()

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,SITEADDRESS,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,Other,PARTNERSHIP,QUIT CLAIM,RE-RECORD,RECEIVERS DEED,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5529008006,0,1,8323 Willoughby Ave,0,0,0,83200,72198,0,0,...,0,0,0,0,0,0,0,0,0,0
4340015017,0,0,8998 Lloyd Pl,0,0,0,359300,203245,0,0,...,0,0,0,0,0,0,0,0,0,0
5531004052,0,0,1207 Greenacre Ave,1,0,0,318800,199313,0,0,...,0,0,0,0,0,0,0,0,0,0
4336004003,0,0,8756 Dorrington Ave,0,0,0,453200,357404,0,0,...,0,0,0,0,0,0,0,0,0,0
5529017005,1,0,930 N La Jolla Ave,0,0,0,538600,389916,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
# drop any nulls

West_Hollywood_df = West_Hollywood_df.dropna()

In [27]:
len(West_Hollywood_df)

6380

In [28]:
# Change the datatype of these two columns into ints

West_Hollywood_df['TAXAMT'] = West_Hollywood_df['TAXAMT'].astype(int)
West_Hollywood_df['EFFYRBLT'] = West_Hollywood_df['EFFYRBLT'].astype(int)

In [29]:
# Make sure our dataframe is only SFR's so exclude every property that has an '#' in its site address

West_Hollywood_df = West_Hollywood_df[~West_Hollywood_df['SITEADDRESS'].str.contains('#')]
West_Hollywood_df.head(3)

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,SITEADDRESS,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,Other,PARTNERSHIP,QUIT CLAIM,RE-RECORD,RECEIVERS DEED,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5529008006,0,1,8323 Willoughby Ave,0,0,0,83200,72198,0,0,...,0,0,0,0,0,0,0,0,0,0
4340015017,0,0,8998 Lloyd Pl,0,0,0,359300,203245,0,0,...,0,0,0,0,0,0,0,0,0,0
5531004052,0,0,1207 Greenacre Ave,1,0,0,318800,199313,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
len(West_Hollywood_df)

1128

In [32]:
# Now drop the SiteAddress column
West_Hollywood_df.drop(['SITEADDRESS'], axis=1, inplace=True)

In [33]:
West_Hollywood_df.head(3)

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,EXEMPTCD,...,Other,PARTNERSHIP,QUIT CLAIM,RE-RECORD,RECEIVERS DEED,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5529008006,0,1,0,0,0,83200,72198,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4340015017,0,0,0,0,0,359300,203245,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5531004052,0,0,1,0,0,318800,199313,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [34]:
# Step 1: Create our X and y

y = West_Hollywood_df['Did it sell?']
X = West_Hollywood_df.drop(['Did it sell?'], 1)

In [35]:
APN = West_Hollywood_df.index.values
APN

array([5529008006, 4340015017, 5531004052, ..., 5554012043, 5554012038,
       5554012051])

In [36]:
# Step 2: Use PCA to reduce dimension to three principal components.
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
X_pca = pca.fit_transform(X)

In [37]:
X_pca

array([[-883763.02136271, -389713.43939826,  -34934.41775827],
       [-591340.97835579, -300900.34265914,  -32938.27897073],
       [-631976.56758082, -298811.4909254 ,  -32887.79353162],
       ...,
       [-485606.22543633, -165344.47916255,    3781.7912153 ],
       [-684050.11180634, -334306.84490454,    9241.36744772],
       [-276825.95640811,  -32497.1643672 ,  316564.93125094]])

In [38]:
# Step 3: Train, test, split

X_train, X_test, y_train, y_test = train_test_split(X_pca, y, random_state=78, stratify=y)

In [39]:
# Step 4: Scale our data

# Create the StandardScaler instance
scaler = StandardScaler()

# Fit our scaler, named 'scaler' to our data, which produces a new StandardScaler object
# which we call 'X_scaler'
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [40]:
X_train_scaled_APN = []

In [41]:
# implement random oversampling
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=1)
X_random_oversampled, y_random_oversampled = ros.fit_resample(X_train_scaled, y_train)

Counter(y_random_oversampled)

Counter({0: 727, 1: 727})

In [45]:
# Logistic regression using random oversampled data
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver='lbfgs', 
                                max_iter=300,
                                random_state=78,
                                class_weight="balanced")

model.fit(X_random_oversampled, y_random_oversampled)

LogisticRegression(class_weight='balanced', max_iter=300, random_state=78)

In [46]:
# Evaluate the model
y_pred = model.predict(X_test_scaled)

In [47]:
print(f" Logistic regression model accuracy: {accuracy_score(y_test, y_pred):.3f}")

 Logistic regression model accuracy: 0.521


In [48]:
# just for shits n' gigs, lets find the balanced accuracy score

from sklearn.metrics import balanced_accuracy_score
balanced_accuracy_score(y_test, y_pred)

0.5228305785123968

In [50]:
from sklearn.metrics import f1_score
f1_score(y_test, y_pred, average="weighted")

0.5924571658601631

In [51]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

          0       0.87      0.52      0.53      0.65      0.52      0.27       242
          1       0.15      0.53      0.52      0.24      0.52      0.27        40

avg / total       0.77      0.52      0.52      0.59      0.52      0.27       282



In [52]:
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test, "PARCEL": y_test.index.values}).reset_index(drop=True)
results.sample(20)

Unnamed: 0,Prediction,Actual,PARCEL
193,0,0,4336015004
127,1,0,4336003009
124,1,0,4340014015
53,1,1,5531005009
66,1,1,4339009008
255,1,0,4337016019
264,0,0,4337016012
170,0,0,5531005032
232,1,0,4336004021
278,1,0,4336004028


In [53]:
West_Hollywood_Results_SFRs_df = West_Hollywood_df.merge(results, how="left", right_on="PARCEL", left_on="PARCEL")
West_Hollywood_Results_SFRs_df.head()

Unnamed: 0,PARCEL,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,QUIT CLAIM,RE-RECORD,RECEIVERS DEED,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY,Prediction,Actual
0,5529008006,0,1,0,0,0,83200,72198,0,0,...,0,0,0,0,0,0,0,0,,
1,4340015017,0,0,0,0,0,359300,203245,0,0,...,0,0,0,0,0,0,0,0,,
2,5531004052,0,0,1,0,0,318800,199313,0,0,...,0,0,0,0,0,0,0,0,,
3,4336004003,0,0,0,0,0,453200,357404,0,0,...,0,0,0,0,0,0,0,0,0.0,0.0
4,5529017005,1,0,0,0,0,538600,389916,0,0,...,0,0,0,0,0,0,0,0,,


In [54]:
West_Hollywood_Results_SFRs_df

Unnamed: 0,PARCEL,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,QUIT CLAIM,RE-RECORD,RECEIVERS DEED,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY,Prediction,Actual
0,5529008006,0,1,0,0,0,83200,72198,0,0,...,0,0,0,0,0,0,0,0,,
1,4340015017,0,0,0,0,0,359300,203245,0,0,...,0,0,0,0,0,0,0,0,,
2,5531004052,0,0,1,0,0,318800,199313,0,0,...,0,0,0,0,0,0,0,0,,
3,4336004003,0,0,0,0,0,453200,357404,0,0,...,0,0,0,0,0,0,0,0,0.0,0.0
4,5529017005,1,0,0,0,0,538600,389916,0,0,...,0,0,0,0,0,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1123,5554012048,1,0,0,0,0,141299,64965,76334,0,...,0,0,0,0,0,0,0,0,,
1124,5554012049,0,1,1,0,0,615790,307383,308407,0,...,0,0,0,0,0,0,0,0,,
1125,5554012043,0,0,1,0,1,443569,354857,88712,0,...,0,0,0,0,0,1,0,0,,
1126,5554012038,0,0,0,0,1,272185,158899,113286,0,...,1,0,0,0,0,0,0,0,,


In [55]:
West_Hollywood_Results_SFRs_df.to_csv('West_Hollywood_Results_SFRs.csv')

# PCA + Oversampling (Random Oversampling) + Logistic Regression (Condos)

In [56]:
# bring in our dataframe

West_Hollywood_df = pd.read_csv(path, index_col='PARCEL')
West_Hollywood_df.head()

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,SITEADDRESS,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,Other,PARTNERSHIP,QUIT CLAIM,RE-RECORD,RECEIVERS DEED,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5529008006,0,1,8323 Willoughby Ave,0,0,0,83200,72198,0,0,...,0,0,0,0,0,0,0,0,0,0
4340015017,0,0,8998 Lloyd Pl,0,0,0,359300,203245,0,0,...,0,0,0,0,0,0,0,0,0,0
5531004052,0,0,1207 Greenacre Ave,1,0,0,318800,199313,0,0,...,0,0,0,0,0,0,0,0,0,0
4336004003,0,0,8756 Dorrington Ave,0,0,0,453200,357404,0,0,...,0,0,0,0,0,0,0,0,0,0
5529017005,1,0,930 N La Jolla Ave,0,0,0,538600,389916,0,0,...,0,0,0,0,0,0,0,0,0,0


In [57]:
# drop any nulls

West_Hollywood_df = West_Hollywood_df.dropna()

In [58]:
len(West_Hollywood_df)

6380

In [59]:
# Change the datatype of these two columns into ints

West_Hollywood_df['TAXAMT'] = West_Hollywood_df['TAXAMT'].astype(int)
West_Hollywood_df['EFFYRBLT'] = West_Hollywood_df['EFFYRBLT'].astype(int)

In [60]:
# Make sure our dataframe is only Condos

West_Hollywood_df = West_Hollywood_df[West_Hollywood_df['SITEADDRESS'].str.contains('#')]
West_Hollywood_df.head(3)

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,SITEADDRESS,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,Other,PARTNERSHIP,QUIT CLAIM,RE-RECORD,RECEIVERS DEED,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5554025186,0,0,1123 N Flores St #21,0,0,1,617170,408876,208294,0,...,0,0,0,0,0,0,0,0,0,0
5554020145,0,0,1200 N Sweetzer Ave #9,0,0,1,852802,533001,319801,0,...,0,0,0,0,0,0,0,0,0,0
5555004108,0,0,1137 Hacienda Pl #107,0,0,1,653187,418040,235147,0,...,0,0,0,0,0,0,0,0,0,0


In [61]:
len(West_Hollywood_df)

5252

In [62]:
# Now drop the SiteAddress column
West_Hollywood_df.drop(['SITEADDRESS'], axis=1, inplace=True)

In [63]:
West_Hollywood_df.head(3)

Unnamed: 0_level_0,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,EXEMPTCD,...,Other,PARTNERSHIP,QUIT CLAIM,RE-RECORD,RECEIVERS DEED,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY
PARCEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5554025186,0,0,0,0,1,617170,408876,208294,0,0,...,0,0,0,0,0,0,0,0,0,0
5554020145,0,0,0,0,1,852802,533001,319801,0,1,...,0,0,0,0,0,0,0,0,0,0
5555004108,0,0,0,0,1,653187,418040,235147,0,0,...,0,0,0,0,0,0,0,0,0,0


In [64]:
# Step 1: Create our X and y

y = West_Hollywood_df['Did it sell?']
X = West_Hollywood_df.drop(['Did it sell?'], 1)

In [65]:
APN = West_Hollywood_df.index.values
APN

array([5554025186, 5554020145, 5555004108, ..., 5560023079, 5554008098,
       5554008097])

In [66]:
# Step 2: Use PCA to reduce dimension to three principal components.
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
X_pca = pca.fit_transform(X)

In [67]:
X_pca

array([[ 324992.12868223,  -33627.16100334,  173726.38599696],
       [ 663124.69830341, -151616.6857415 ,  109308.06170406],
       [ 727427.64983627,  112762.84949708,  482646.97982208],
       ...,
       [ 199267.26054695,   25593.95483372,  160751.61574812],
       [-143691.14265002,  115350.04376565,   61719.15162262],
       [-340592.00870213,  197130.33101405,    5569.68117715]])

In [68]:
# Step 3: Train, test, split

X_train, X_test, y_train, y_test = train_test_split(X_pca, y, random_state=78, stratify=y)

In [69]:
# Step 4: Scale our data

# Create the StandardScaler instance
scaler = StandardScaler()

# Fit our scaler, named 'scaler' to our data, which produces a new StandardScaler object
# which we call 'X_scaler'
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [70]:
X_train_scaled_APN = []

In [71]:
# implement random oversampling
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=1)
X_random_oversampled, y_random_oversampled = ros.fit_resample(X_train_scaled, y_train)

Counter(y_random_oversampled)

Counter({0: 3308, 1: 3308})

In [72]:
# Logistic regression using random oversampled data
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver='lbfgs', 
                                max_iter=300,
                                random_state=78,
                                class_weight="balanced")

model.fit(X_random_oversampled, y_random_oversampled)

LogisticRegression(class_weight='balanced', max_iter=300, random_state=78)

In [73]:
# Evaluate the model
y_pred = model.predict(X_test_scaled)

In [74]:
print(f" Logistic regression model accuracy: {accuracy_score(y_test, y_pred):.3f}")

 Logistic regression model accuracy: 0.605


In [75]:
# just for shits n' gigs, lets find the balanced accuracy score

from sklearn.metrics import balanced_accuracy_score
balanced_accuracy_score(y_test, y_pred)

0.5396515995337392

In [76]:
from sklearn.metrics import f1_score
f1_score(y_test, y_pred, average="weighted")

0.6559119596623073

In [77]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

          0       0.86      0.64      0.44      0.73      0.53      0.29      1103
          1       0.19      0.44      0.64      0.26      0.53      0.28       210

avg / total       0.75      0.61      0.47      0.66      0.53      0.29      1313



In [78]:
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test, "PARCEL": y_test.index.values}).reset_index(drop=True)
results.sample(20)

Unnamed: 0,Prediction,Actual,PARCEL
865,1,1,4340021065
494,0,1,4340027050
764,1,1,4337013065
169,1,0,4340027151
857,0,0,4339007053
47,1,0,4337005104
607,0,0,4339019067
828,0,0,5555006032
752,0,1,4339015169
1090,1,0,5559006134


In [79]:
West_Hollywood_Results_Condos_df = West_Hollywood_df.merge(results, how="left", right_on="PARCEL", left_on="PARCEL")
West_Hollywood_Results_Condos_df.head()

Unnamed: 0,PARCEL,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,QUIT CLAIM,RE-RECORD,RECEIVERS DEED,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY,Prediction,Actual
0,5554025186,0,0,0,0,1,617170,408876,208294,0,...,0,0,0,0,0,0,0,0,,
1,5554020145,0,0,0,0,1,852802,533001,319801,0,...,0,0,0,0,0,0,0,0,,
2,5555004108,0,0,0,0,1,653187,418040,235147,0,...,0,0,0,0,0,0,0,0,1.0,0.0
3,4340011022,0,0,1,0,1,1025000,542200,482800,0,...,0,0,0,0,0,0,0,0,,
4,4340027180,0,0,1,0,1,1023354,756033,267321,0,...,0,0,0,0,0,0,0,0,1.0,0.0


In [80]:
West_Hollywood_Results_Condos_df

Unnamed: 0,PARCEL,Owned by Trust?,Owned by Business?,MAIL DIFFERENT FROM SITE?,MAIL OUTSIDE CA?,TITLECO1,ASSDTOTAL,ASSDLAND,ASSDSTCT,ASSDOTHR,...,QUIT CLAIM,RE-RECORD,RECEIVERS DEED,SPECIAL WARRANTY,TAX DEED,TRUST TRANSFER,TRUSTEES,WARRANTY,Prediction,Actual
0,5554025186,0,0,0,0,1,617170,408876,208294,0,...,0,0,0,0,0,0,0,0,,
1,5554020145,0,0,0,0,1,852802,533001,319801,0,...,0,0,0,0,0,0,0,0,,
2,5555004108,0,0,0,0,1,653187,418040,235147,0,...,0,0,0,0,0,0,0,0,1.0,0.0
3,4340011022,0,0,1,0,1,1025000,542200,482800,0,...,0,0,0,0,0,0,0,0,,
4,4340027180,0,0,1,0,1,1023354,756033,267321,0,...,0,0,0,0,0,0,0,0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5247,5554009037,0,0,0,0,1,520901,413285,107616,0,...,0,0,0,0,0,0,0,0,,
5248,5554010042,0,0,0,0,1,365239,253854,110885,500,...,0,0,0,0,0,0,0,0,,
5249,5560023079,0,0,0,0,1,514068,411255,102813,0,...,0,0,0,0,0,0,0,0,,
5250,5554008098,0,0,0,0,1,325319,260257,65062,0,...,0,0,0,0,0,0,0,0,,


In [83]:
West_Hollywood_Results_Condos_df.to_csv('West_Hollywood_Results_Condos.csv')