In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


* **Let's first read our dataset and take a quick look what we have.**

In [None]:
data = pd.read_csv('../input/restaurant-business-rankings-2020/Future50.csv')

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.columns

In [None]:
data.describe()

In [None]:
# correlation map
f,ax = plt.subplots(figsize=(8,8))
sns.heatmap(data.corr(), annot=True, linewidths = .5 ,fmt ='.1f', ax=ax)

plt.show()

In [None]:
# Units vs Sales
f,ax = plt.subplots(figsize=(8,8))
sns.violinplot(data['Units'], data['Sales'])

plt.show()

In [None]:
color_list = ['red' if i=='True' else 'green' for i in data.loc[:,'Franchising']]
pd.plotting.scatter_matrix(data.loc[:, data.columns != 'Franchising'],
                                       c=color_list,
                                       figsize= [15,15],
                                       diagonal='hist',
                                       alpha=0.5,
                                       s = 200,
                                       marker = '&',
                                       edgecolor="black")
plt.show()


In [None]:
sns.countplot(x="Franchising", data=data)
data.loc[:,'Franchising'].value_counts()

In [None]:
# create data1 that includes Units that is feature and Sales that is target variable
data1 = data[data['Franchising'] =='Yes']
x = np.array(data1.loc[:,'Units']).reshape(-1,1)
y = np.array(data1.loc[:,'Sales']).reshape(-1,1)
# Scatter
plt.figure(figsize=[5,5])
plt.scatter(x=x,y=y)
plt.xlabel('Units')
plt.ylabel('Sales')
plt.show()

# **Linear Regression**

In [None]:
# LinearRegression
from sklearn.linear_model import LinearRegression
linear_reg = LinearRegression()

# Predict space
predict_space = np.linspace(min(x), max(x)).reshape(-1,1)

# Fit
linear_reg.fit(x,y)

# Predict
predicted = linear_reg.predict(predict_space)

# Plot regression line and scatter
plt.plot(predict_space, predicted, color='black', linewidth=3)
plt.scatter(x=x,y=y)
plt.xlabel('Units')
plt.ylabel('Sales')
plt.show()

# **Polynomial Linear Regression**

In [None]:
x = data["Units"].values.reshape(-1,1)
y = data["Sales"].values.reshape(-1,1)
plt.scatter(x,y,color="green")
plt.plot(predict_space, predicted, color='green', linewidth=3 , label = 'linear')


# polynomial regression = y =b0 + b1*x + b2*x^2 + b3*x^3 + ... +bn*x^n
from sklearn.preprocessing import PolynomialFeatures 
polynomial_regression =PolynomialFeatures(degree = 10)

x_polynomial = polynomial_regression.fit_transform(x)

# fit
linear_regression2 = LinearRegression()
linear_regression2.fit(x_polynomial,y)

#
y_head2 = linear_regression2.predict(x_polynomial)
plt.plot(x,y_head2,color="red",label = "poly")
plt.legend()
plt.show()


# **Decision Tree Regression**

In [None]:
x=data["Units"].values.reshape(-1,1)
y=data["Sales"].values.reshape(-1,1)

#  decision tree regression
from sklearn.tree import DecisionTreeRegressor
tree_reg = DecisionTreeRegressor()  
tree_reg.fit(x,y)

x_ = np.arange(min(x),max(x),0.01).reshape(-1,1)
y_head = tree_reg.predict(x_)

plt.scatter(x,y,color="black")
plt.plot(x_,y_head,color="purple")
plt.xlabel("Units")
plt.ylabel("Sales")
plt.show()


# **Random Forest Regression**

In [None]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators = 100 , random_state = 42)
rf.fit(x,y)

# print("7.8 seviyesinde fiyatın ne kadar olduğu:",rf.predict(7.8))

x_ = np.arange(min(x),max(x),0.01).reshape(-1,1)
y_head = rf.predict(x_)

# visualize
plt.scatter(x,y,color="red")
plt.plot(x_,y_head,color="purple")
plt.xlabel("Units")
plt.ylabel("Sales")
plt.show()

# **R-Square**

 ****R-Square with Random Forest Regression****

In [None]:

from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators = 100 , random_state = 30)
rf.fit(x,y)

y_head = rf.predict(x)

from sklearn.metrics import r2_score
print("r_score:",r2_score(y,y_head))
