In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns


from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



In [None]:
df = pd.read_csv('../input/streeteasy-dataset/manhattan.csv')
df.head()

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
x = df[['size_sqft','building_age_yrs']]
y = df[['rent']]

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size = 0.8, test_size = 0.2, random_state=6)

ols = LinearRegression()

ols.fit(x_train, y_train)

In [None]:
fig = plt.figure(figsize=(12,12))
sns.scatterplot(data=df, x='bedrooms', y='rent',hue= 'neighborhood', palette = 'bright', sizes=(20, 200))

In [None]:
fig = px.scatter_3d(df, x='rent', y='size_sqft', z='building_age_yrs', color ='neighborhood', size='rent', size_max=20)
fig.show()

In [None]:
fig = plt.figure(figsize=(16,12))
ax = plt.subplot()
sns.boxplot(data=df, x='neighborhood', y='rent')
ax.set_xticklabels(labels= df['neighborhood'].unique(), rotation = 90)
plt.show()

In [None]:
fig = plt.figure(figsize=(16,20))
ax = plt.subplot(2,1,1)
plt.hist2d(df['rent'], df['building_age_yrs'])
plt.xlabel('Rent', fontsize=20)
plt.ylabel('Building age', fontsize=20)
ax = plt.subplot(2,1,2)
pd.plotting.autocorrelation_plot(df.rent)

plt.show()

In [None]:
sns.lmplot(data=df, x= 'size_sqft', y='rent', hue='has_patio')
plt.show()

In [None]:
X= df['size_sqft'].values.reshape(-1,1)
y= df['rent'].values.reshape(-1,1)
model = LinearRegression()
model.fit(X,y)
y_new = model.predict(X)

In [None]:
x = df.iloc[:, 2:len(df.columns)-2]
y = df.iloc[:,1:2]
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, test_size = 0.2, random_state = 6)

In [None]:
x_train

**Let's gonna prove this model with this flat: https://streeteasy.com/rental/2177438**
This apartment have: 
Features	Sonny’s Apartment
bedrooms	1
bathrooms	1
size_sqft	620 ft²
min_to_subway	16 min
floor	1
building_age_yrs	98 (built in 1920)
no_fee	1
has_roofdeck	0
has_washer_dryer	Yas
has_doorman	0
has_elevator	0
has_dishwasher	1
has_patio	1
has_gym	0


In [None]:
model= LinearRegression()
model.fit(x_train, y_train)
y_predict = model.predict(x_test)
apartment = [[1,1,620,16,1,98,1,0,1,0,0,1,1,0]]

predict= model.predict(apartment)
predict

In [None]:
plt.figure(figsize=(12,12))
plt.scatter(y_test, y_predict, alpha=0.4)
plt.xlabel("Prices: $Y_i$")
plt.ylabel("Predicted prices: $\hat{Y}_i$")
plt.title("Actual Rent vs Predicted Rent")
plt.show()

In [None]:
model.coef_

In [None]:
plt.figure(figsize=(12,12))
ax = plt.subplot(211)
plt.scatter(df[['size_sqft']], df[['rent']], alpha=0.4)

ax = plt.subplot(212)
plt.scatter(df[['min_to_subway']], df[['rent']], alpha=0.4)
plt.show()

In [None]:
model.score(x_train, y_train)

In [None]:
fig = plt.figure(1, figsize=(12, 14))


elev = 43.5
azim = -110

ax = Axes3D(fig, elev=elev, azim=azim)

ax.scatter(x_train[['size_sqft']], x_train[['building_age_yrs']], y_train, c='k', marker='+')

ax.plot_surface(np.array([[0, 0], [4500, 4500]]), np.array([[0, 140], [0, 140]]), ols.predict(np.array([[0, 0, 4500, 4500], [0, 140, 0, 140]]).T).reshape((2, 2)), alpha=.7)

ax.set_xlabel('Size (ft$^2$)')
ax.set_ylabel('Building Age (Years)')
ax.set_zlabel('Rent ($)')

ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
plt.show()