In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.preprocessing import PolynomialFeatures, PowerTransformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler

import warnings
warnings.filterwarnings('ignore')

 ## Machine Learning regressions to predict housing price 

Columns
id - Notice identifier. It is not unique: if the notification is updated by the real estate agency (new version of the notification) a new record is created with the same id but different dates: registration and cancellation.

operation_type - Type of operation (these are all sales, can be removed).

l2 - Administrative level 2: usually province

l3 - Administrative level 3: usually city

lat - Latitude.

lon - Longitude.

price - Price published in the ad.

property_type - Type of property (House, Apartment, PH).

rooms - Number of rooms (useful in Argentina).

bathrooms - Number of bathrooms.

start_date - Date when the ad was created.

end_date - Date of termination of the advertisement.

created_on - Date when the first version of the notice was created.

surface_total - Total area in m².

surface_covered - Covered area in m².

title - Title of the advertisement.

description - Description of the advertisement.

ad_type - Type of ad (Property, Development/Project).

Acknowledgements
The data in this dataset was collected by Properati.

In [2]:
df = pd.read_csv('ar_properties.csv')
df

Unnamed: 0,id,ad_type,start_date,end_date,created_on,lat,lon,l1,l2,l3,...,bathrooms,surface_total,surface_covered,price,currency,price_period,title,description,property_type,operation_type
0,wdQ5hWhv8P14T7Sh9g4QCg==,Propiedad,2020-12-25,9999-12-31,2020-12-25,-32.716652,-68.642692,Argentina,Mendoza,,...,,350.0,350.0,,,,Excelentes Lotes Sobre Ruta 34,Corredor Responsable: VICTOR E. MONTIVERO - C....,Lote,Venta
1,nnMBYZ4RMRY+vm753EtA+g==,Propiedad,2020-12-25,9999-12-31,2020-12-25,-24.797723,-65.467514,Argentina,Salta,,...,,1541.0,1541.0,,,Mensual,TERRENO + VENTA + JARDINES DE SAN LORENZO +150...,Corredor Responsable: Pablo Castañeda - C.U.C....,Lote,Venta
2,+dnVA1K6JxzL1zAjOEQ1pA==,Propiedad,2020-12-25,2020-12-29,2020-12-25,-34.919373,-58.020591,Argentina,Bs.As. G.B.A. Zona Sur,La Plata,...,,1000.0,1000.0,,,Mensual,Lote en Venta de 1000 m2 en La Plata,Corredor Responsable: Rico Sebastián - Martill...,Lote,Venta
3,dLHXKN5/sRZpm9Yk0yI2nA==,Propiedad,2020-12-25,2020-12-29,2020-12-25,-34.919455,-58.024807,Argentina,Bs.As. G.B.A. Zona Sur,La Plata,...,,1000.0,1000.0,,,Mensual,Lote en Venta de 1000 m2 en La Plata,Corredor Responsable: Rico Sebastián - Martill...,Lote,Venta
4,wtw/k887EPipd37UYHKb1Q==,Propiedad,2020-12-25,9999-12-31,2020-12-25,-34.364924,-58.783143,Argentina,Bs.As. G.B.A. Zona Norte,Escobar,...,,18164.0,18164.0,,,Mensual,PANAMERICANA 47300,Nave principal 66 x 90 m: 6005 m2 cubiertos...,Otro,Venta
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,SEeK1Z96Q3KuTKhNyREFDg==,Propiedad,2020-05-12,2020-09-02,2020-05-12,-34.900186,-56.176744,Uruguay,Montevideo,,...,2.0,58.0,55.0,23000.0,UYU,Mensual,Apartamento en ALQUILER 2 dormitorios - Cordón:,Apartamento en ALQUILER 2 dormitorios - Cordón...,Departamento,Alquiler
999996,9rprv1XJCeNLnJP9dgWy0A==,Propiedad,2020-05-12,2020-05-28,2020-05-12,-34.886879,-56.188048,Uruguay,Montevideo,,...,1.0,62.0,62.0,25000.0,UYU,Mensual,Apartamento alquiler a estrenar 2 dormitorios ...,Apartamento moderno a estrenar de 2 dormitorio...,Departamento,Alquiler
999997,ABzRp7AdRjDSFxKP1iNeYA==,Propiedad,2020-05-12,2020-08-11,2020-05-12,-34.891367,-56.098215,Uruguay,Montevideo,,...,2.0,,85.0,48000.0,UYU,Mensual,Casa - Malvín,"EN EL CORAZON DE MALVIN, CERCA DE TODO<br><br>...",Casa,Alquiler
999998,/dXMbrn3glBxu4WQBQu6IA==,Propiedad,2020-05-12,9999-12-31,2020-05-12,-34.462092,-57.842109,Uruguay,Colonia,,...,1.0,135.0,135.0,35000.0,UYU,Mensual,House - Colonia del Sacramento,"Casa de tres dormitorios en muy linda cuadra, ...",Casa,Alquiler


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Data columns (total 25 columns):
 #   Column           Non-Null Count    Dtype  
---  ------           --------------    -----  
 0   id               1000000 non-null  object 
 1   ad_type          1000000 non-null  object 
 2   start_date       1000000 non-null  object 
 3   end_date         1000000 non-null  object 
 4   created_on       1000000 non-null  object 
 5   lat              849189 non-null   float64
 6   lon              848255 non-null   float64
 7   l1               1000000 non-null  object 
 8   l2               1000000 non-null  object 
 9   l3               946673 non-null   object 
 10  l4               239496 non-null   object 
 11  l5               5001 non-null     object 
 12  l6               0 non-null        float64
 13  rooms            526423 non-null   float64
 14  bedrooms         466213 non-null   float64
 15  bathrooms        776733 non-null   float64
 16  surface_total    37

In [4]:
df.isna().sum()

id                       0
ad_type                  0
start_date               0
end_date                 0
created_on               0
lat                 150811
lon                 151745
l1                       0
l2                       0
l3                   53327
l4                  760504
l5                  994999
l6                 1000000
rooms               473577
bedrooms            533787
bathrooms           223267
surface_total       620499
surface_covered     618549
price                36902
currency             38281
price_period        636985
title                    0
description             18
property_type            0
operation_type           0
dtype: int64

Mengubah semua nilai price dalam USD

In [5]:
df.loc[df['currency'] == 'ARS', 'price']

28        2000000.0
29         780000.0
30        1650000.0
31          50000.0
32          35000.0
            ...    
997658    3200000.0
997659     170000.0
997660      75000.0
997661      25000.0
997662      56000.0
Name: price, Length: 202179, dtype: float64

In [6]:
for i in df['currency'].unique():
    if i == 'ARS':
        df.loc[df['currency'] == i, 'price'] =  df.loc[df['currency'] == i, 'price'] * 0.009
    elif i == 'PEN':
        df.loc[df['currency'] == i, 'price'] = df.loc[df['currency'] == i, 'price'] * 0.27
    elif i == 'UYU':
        df.loc[df['currency'] == i, 'price'] = df.loc[df['currency'] == i, 'price'] * 0.024
    else:
        df.loc[df['currency'] == i, 'price'] = df.loc[df['currency'] == i, 'price'] 

In [7]:
df.loc[df['currency'] == 'ARS', 'price']

28        18000.0
29         7020.0
30        14850.0
31          450.0
32          315.0
           ...   
997658    28800.0
997659     1530.0
997660      675.0
997661      225.0
997662      504.0
Name: price, Length: 202179, dtype: float64

## Feature Selection

Kolom Id dan ad_type akan di drop(ad_type di drop karena hanya memiliki 1 jenis value (tidak memberikan informasi))

In [8]:
df['ad_type'].unique() ## akan di drop karena hanya memiliki 1 jenis value (tidak memberikan informasi)

array(['Propiedad'], dtype=object)

In [9]:
df.drop(columns=['ad_type','id'], inplace = True)

start_date, end_date, dan created_on tidak memberikan informasi yang relevan terhadap harga rumah (bergantung kepada si pembuat ads). begitu juga dengan period_price(asumsi : bias dapat merupakan harga sewa iklan)

In [10]:
df.drop(columns =['start_date', 'end_date', 'created_on', 'price_period'], inplace = True)

In [11]:
missing_percentage = df.isnull().sum()*100/len(df.index)
missing_percentage

lat                 15.0811
lon                 15.1745
l1                   0.0000
l2                   0.0000
l3                   5.3327
l4                  76.0504
l5                  99.4999
l6                 100.0000
rooms               47.3577
bedrooms            53.3787
bathrooms           22.3267
surface_total       62.0499
surface_covered     61.8549
price                3.6902
currency             3.8281
title                0.0000
description          0.0018
property_type        0.0000
operation_type       0.0000
dtype: float64

l4, l5, l6 merupakan informasi yang spesifik namun memiliki missing value yang besar

In [12]:
df.drop(columns=['l4', 'l5', 'l6'], inplace = True)

kolom currency, title, dan description akan di drop. karena currency hanya sebagai data pendukung untuk mengubah price. sedangkan title dan description sangat kompleks untuk dilakukan analisis dengan unique value yang besar.

In [13]:
df.drop(columns = ['currency', 'title', 'description'], inplace = True)

In [14]:
df.isna().sum()

lat                150811
lon                151745
l1                      0
l2                      0
l3                  53327
rooms              473577
bedrooms           533787
bathrooms          223267
surface_total      620499
surface_covered    618549
price               36902
property_type           0
operation_type          0
dtype: int64

## Handling Missing Value

Mengisi Missing Value untuk feature yang belum terisi, dengan catatan:
- mean untuk data continous numerik
- median untuk data diskrit numerik
- modus untuk data string/object

In [15]:
data ={
    'lat' : df['lat'].mean(),
    'lon' : df['lon'].mean(),
    'l3' : df['l3'].mode()[0],
    'rooms' : df['rooms'].median(),
    'bedrooms' : df['bedrooms'].median(),
    'bathrooms' : df['bathrooms'].median(),
    'surface_total' : df['surface_total'].mean(),
    'surface_covered' : df['surface_covered'].mean()
}   
    

In [16]:
df.fillna(data, inplace = True)

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Data columns (total 13 columns):
 #   Column           Non-Null Count    Dtype  
---  ------           --------------    -----  
 0   lat              1000000 non-null  float64
 1   lon              1000000 non-null  float64
 2   l1               1000000 non-null  object 
 3   l2               1000000 non-null  object 
 4   l3               1000000 non-null  object 
 5   rooms            1000000 non-null  float64
 6   bedrooms         1000000 non-null  float64
 7   bathrooms        1000000 non-null  float64
 8   surface_total    1000000 non-null  float64
 9   surface_covered  1000000 non-null  float64
 10  price            963098 non-null   float64
 11  property_type    1000000 non-null  object 
 12  operation_type   1000000 non-null  object 
dtypes: float64(8), object(5)
memory usage: 99.2+ MB


Menghapus data yang kolom price nya memiliki missing value

In [18]:
df.dropna(inplace=True)

In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 963098 entries, 23 to 999999
Data columns (total 13 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   lat              963098 non-null  float64
 1   lon              963098 non-null  float64
 2   l1               963098 non-null  object 
 3   l2               963098 non-null  object 
 4   l3               963098 non-null  object 
 5   rooms            963098 non-null  float64
 6   bedrooms         963098 non-null  float64
 7   bathrooms        963098 non-null  float64
 8   surface_total    963098 non-null  float64
 9   surface_covered  963098 non-null  float64
 10  price            963098 non-null  float64
 11  property_type    963098 non-null  object 
 12  operation_type   963098 non-null  object 
dtypes: float64(8), object(5)
memory usage: 102.9+ MB


In [20]:
### Encoding

In [21]:
## Check Object Info
col = ['l1', 'l2', 'l3', 'property_type', 'operation_type']
listItem = []
for i in col:
    listItem.append([i, 
                    df[i].dtype,
                    df[i].nunique()])

dfDesc = pd.DataFrame(columns=['Column Name', 'Data Type', 'Number of Unique'],
                     data=listItem)
dfDesc.sort_values('Number of Unique', ascending =False)

Unnamed: 0,Column Name,Data Type,Number of Unique
2,l3,object,1209
1,l2,object,43
3,property_type,object,10
0,l1,object,4
4,operation_type,object,3


In [22]:
df_1 = df.copy()
df_1

Unnamed: 0,lat,lon,l1,l2,l3,rooms,bedrooms,bathrooms,surface_total,surface_covered,price,property_type,operation_type
23,-34.400188,-58.724054,Argentina,Bs.As. G.B.A. Zona Norte,Escobar,4.0,2.0,1.0,514.889481,98.0,0.0,Otro,Alquiler temporal
28,-24.825768,-65.470791,Argentina,Salta,Salta,3.0,2.0,1.0,324.000000,324.0,18000.0,Lote,Venta
29,-38.982898,-68.350754,Argentina,Neuquén,Confluencia,3.0,2.0,1.0,250.000000,250.0,7020.0,Lote,Venta
30,-38.948296,-68.265567,Argentina,Neuquén,Confluencia,3.0,2.0,1.0,360.000000,360.0,14850.0,Lote,Venta
31,-34.602164,-58.375030,Argentina,Capital Federal,San Nicolás,3.0,2.0,2.0,160.000000,160.0,450.0,Oficina,Alquiler
...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,-34.900186,-56.176744,Uruguay,Montevideo,Mar del Plata,3.0,2.0,2.0,58.000000,55.0,552.0,Departamento,Alquiler
999996,-34.886879,-56.188048,Uruguay,Montevideo,Mar del Plata,1.0,2.0,1.0,62.000000,62.0,600.0,Departamento,Alquiler
999997,-34.891367,-56.098215,Uruguay,Montevideo,Mar del Plata,4.0,3.0,2.0,514.889481,85.0,1152.0,Casa,Alquiler
999998,-34.462092,-57.842109,Uruguay,Colonia,Mar del Plata,5.0,3.0,1.0,135.000000,135.0,840.0,Casa,Alquiler


In [23]:
df.drop(columns='l3', inplace= True)

In [24]:
df.head()

Unnamed: 0,lat,lon,l1,l2,rooms,bedrooms,bathrooms,surface_total,surface_covered,price,property_type,operation_type
23,-34.400188,-58.724054,Argentina,Bs.As. G.B.A. Zona Norte,4.0,2.0,1.0,514.889481,98.0,0.0,Otro,Alquiler temporal
28,-24.825768,-65.470791,Argentina,Salta,3.0,2.0,1.0,324.0,324.0,18000.0,Lote,Venta
29,-38.982898,-68.350754,Argentina,Neuquén,3.0,2.0,1.0,250.0,250.0,7020.0,Lote,Venta
30,-38.948296,-68.265567,Argentina,Neuquén,3.0,2.0,1.0,360.0,360.0,14850.0,Lote,Venta
31,-34.602164,-58.37503,Argentina,Capital Federal,3.0,2.0,2.0,160.0,160.0,450.0,Oficina,Alquiler


In [25]:
df_1.drop(columns = 'l3', inplace =True)

In [26]:
df_1 = pd.get_dummies(df_1, columns=['l1', 'l2', 'property_type', 'operation_type'])

In [27]:
df_1.head()

Unnamed: 0,lat,lon,rooms,bedrooms,bathrooms,surface_total,surface_covered,price,l1_Argentina,l1_Brasil,...,property_type_Departamento,property_type_Depósito,property_type_Local comercial,property_type_Lote,property_type_Oficina,property_type_Otro,property_type_PH,operation_type_Alquiler,operation_type_Alquiler temporal,operation_type_Venta
23,-34.400188,-58.724054,4.0,2.0,1.0,514.889481,98.0,0.0,1,0,...,0,0,0,0,0,1,0,0,1,0
28,-24.825768,-65.470791,3.0,2.0,1.0,324.0,324.0,18000.0,1,0,...,0,0,0,1,0,0,0,0,0,1
29,-38.982898,-68.350754,3.0,2.0,1.0,250.0,250.0,7020.0,1,0,...,0,0,0,1,0,0,0,0,0,1
30,-38.948296,-68.265567,3.0,2.0,1.0,360.0,360.0,14850.0,1,0,...,0,0,0,1,0,0,0,0,0,1
31,-34.602164,-58.37503,3.0,2.0,2.0,160.0,160.0,450.0,1,0,...,0,0,0,0,1,0,0,1,0,0


In [28]:
## Model Machine Learning

In [67]:
# Menentukan Target dan features
X = df_1.drop(columns = ['price'])
y = df_1['price']

In [68]:
# Splitting Data train dan test
X_train, X_test, y_train, y_test = train_test_split(X,y, train_size = .80, random_state = 42)

In [69]:
## def function modelling dan hasil evaluation matrix
def Eva_Matrix_DF(model, X_train, X_test, y_train, y_test, Nama):
    Model = model.fit(X_train, y_train)
    y_pred_tr = Model.predict(X_train)
    R2_tr = r2_score(y_train, y_pred_tr)
    MAE_tr = mean_absolute_error(y_train, y_pred_tr)
    MSE_tr = mean_squared_error(y_train, y_pred_tr)
    RMSE_tr = np.sqrt(MSE_tr)
    y_pred_ts = Model.predict(X_test)
    R2_ts = r2_score(y_test, y_pred_ts)
    MAE_ts = mean_absolute_error(y_test, y_pred_ts)
    MSE_ts = mean_squared_error(y_test, y_pred_ts)
    RMSE_ts = np.sqrt(MSE_ts)
    data = {
    "Training " + Nama : [R2_tr, MAE_tr, MSE_tr, RMSE_tr],
    "Testing " + Nama: [R2_ts, MAE_ts, MSE_ts, RMSE_ts]}
    df = pd.DataFrame(data = data, index = ['R2', 'MAE', 'MSE', 'RMSE'])
    return df

In [70]:
# Regresi menggunakan Linear regression
df_1_LinReg = Eva_Matrix_DF(LinearRegression(fit_intercept=False), X_train, X_test, y_train, y_test, "LinReg")

In [71]:
df_1_LinReg

Unnamed: 0,Training LinReg,Testing LinReg
R2,0.0001878,0.1177324
MAE,146762.7,132648.4
MSE,130141000000000.0,140227700000.0
RMSE,11407940.0,374469.8


In [72]:
# Regresi menggunakan Ridge
df_1_Ridge = Eva_Matrix_DF(Ridge(), X_train, X_test, y_train, y_test, "Ridge")

In [75]:
df_1_Ridge

Unnamed: 0,Training Ridge,Testing Ridge
R2,0.0001878,0.1177314
MAE,146760.8,132646.3
MSE,130141000000000.0,140227800000.0
RMSE,11407940.0,374470.0


In [76]:
# Regresi menggunakan Lasso
df_1_Lasso = Eva_Matrix_DF(Lasso(), X_train, X_test, y_train, y_test, "Lasso")

In [77]:
df_1_Lasso

Unnamed: 0,Training Lasso,Testing Lasso
R2,0.0001877986,0.117709
MAE,146752.9,132637.4
MSE,130141000000000.0,140231400000.0
RMSE,11407940.0,374474.8


In [78]:
# Regresi menggunakan ElasticNet
df_1_ElasticNet = Eva_Matrix_DF(ElasticNet(), X_train, X_test, y_train, y_test, "Lasso")
df_1_ElasticNet

Unnamed: 0,Training Lasso,Testing Lasso
R2,0.0001343327,0.08946932
MAE,143280.1,129516.3
MSE,130148000000000.0,144719800000.0
RMSE,11408240.0,380420.5


In [79]:
pd.concat([df_1_LinReg, df_1_Ridge, df_1_Lasso, df_1_ElasticNet], axis = 1).T.sort_values('R2', ascending =False)

Unnamed: 0,R2,MAE,MSE,RMSE
Testing LinReg,0.117732,132648.414527,140227700000.0,374469.8
Testing Ridge,0.117731,132646.343631,140227800000.0,374470.0
Testing Lasso,0.117709,132637.421856,140231400000.0,374474.8
Testing Lasso,0.089469,129516.340275,144719800000.0,380420.5
Training LinReg,0.000188,146762.703019,130141000000000.0,11407940.0
Training Ridge,0.000188,146760.816491,130141000000000.0,11407940.0
Training Lasso,0.000188,146752.86327,130141000000000.0,11407940.0
Training Lasso,0.000134,143280.082981,130148000000000.0,11408240.0


## Feature Engineering

In [43]:
## Scalling

In [46]:
scaler = MinMaxScaler()
scaler.fit(X_train) # preprocess fit
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [45]:
## mencoba regresi dengan Lasso

In [47]:
# Regresi menggunakan Lasso
df_scl_Lasso = Eva_Matrix_DF(Lasso(), X_train_scaled, X_test_scaled, y_train, y_test, "Lasso")

In [48]:
df_scl_Lasso

Unnamed: 0,Training Lasso,Testing Lasso
R2,0.0001881156,0.1245059
MAE,146708.4,132549.4
MSE,129939100000000.0,128081700000.0
RMSE,11399080.0,357885.1


In [49]:
## Polynomial Regression
poly = PolynomialFeatures(degree=3, include_bias=False)
yeo_pow = PowerTransformer(method='yeo-johnson')

In [53]:
df_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 963098 entries, 23 to 999999
Data columns (total 68 columns):
 #   Column                            Non-Null Count   Dtype  
---  ------                            --------------   -----  
 0   lat                               963098 non-null  float64
 1   lon                               963098 non-null  float64
 2   rooms                             963098 non-null  float64
 3   bedrooms                          963098 non-null  float64
 4   bathrooms                         963098 non-null  float64
 5   surface_total                     963098 non-null  float64
 6   surface_covered                   963098 non-null  float64
 7   price                             963098 non-null  float64
 8   l1_Argentina                      963098 non-null  uint8  
 9   l1_Brasil                         963098 non-null  uint8  
 10  l1_Estados Unidos                 963098 non-null  uint8  
 11  l1_Uruguay                        963098 non-null  

In [52]:
num_col = df[['lat', 'lon', 'surface_total', 'surface_covered']]

In [63]:
n = df_1[df_1['price'] == 0]
n

Unnamed: 0,lat,lon,rooms,bedrooms,bathrooms,surface_total,surface_covered,price,l1_Argentina,l1_Brasil,...,property_type_Departamento,property_type_Depósito,property_type_Local comercial,property_type_Lote,property_type_Oficina,property_type_Otro,property_type_PH,operation_type_Alquiler,operation_type_Alquiler temporal,operation_type_Venta
23,-34.400188,-58.724054,4.0,2.0,1.0,514.889481,98.000000,0.0,1,0,...,0,0,0,0,0,1,0,0,1,0
430,-38.061534,-57.547720,3.0,2.0,1.0,514.889481,6123.054086,0.0,1,0,...,0,0,0,0,0,0,0,0,1,0
1699,-34.790048,-59.256701,3.0,2.0,1.0,514.889481,6123.054086,0.0,1,0,...,0,0,0,1,0,0,0,0,0,1
1990,-40.154585,-71.354476,2.0,1.0,1.0,514.889481,6123.054086,0.0,1,0,...,1,0,0,0,0,0,0,0,0,1
4930,-34.578272,-58.441348,2.0,1.0,1.0,514.889481,6123.054086,0.0,1,0,...,1,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
996869,-34.577712,-58.436266,3.0,2.0,1.0,91.000000,78.000000,0.0,1,0,...,1,0,0,0,0,0,0,0,0,1
996870,-34.577712,-58.436266,3.0,2.0,1.0,91.000000,78.000000,0.0,1,0,...,1,0,0,0,0,0,0,0,0,1
996875,-26.826181,-65.314981,3.0,2.0,2.0,120.000000,120.000000,0.0,1,0,...,1,0,0,0,0,0,0,1,0,0
996879,-34.790048,-59.256701,6.0,3.0,1.0,514.889481,6123.054086,0.0,1,0,...,0,0,0,0,0,0,1,1,0,0


In [66]:
df_1.drop(n.index, inplace = True)