In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from scipy.stats.mstats import winsorize
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error as MSE

import statsmodels.api as sm
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
from statsmodels.tools.eval_measures import mse, rmse
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,accuracy_score
from scipy.stats import jarque_bera
from scipy.stats import normaltest
import warnings
pd.options.display.max_columns = None
warnings.filterwarnings('ignore')
sns.set_style("whitegrid")
baslik_font = {'family': 'arial', 'color': 'darkred','weight': 'bold','size': 13 }
eksen_font  = {'family': 'arial', 'color': 'darkblue','weight': 'bold','size': 10 }

## with Regression Project

In [7]:
df_energy_app = pd.read_csv("clean_energy_appliances")

In [8]:
features = ['lights', 'T1', 'RH_1', 'T2', 'RH_2', 'T3', 'RH_3',
         'RH_6',  'RH_7',  'RH_8','hour1', 'house_T',   
             'tout_tdewpoint', 'hour*2', 
         'hour_avg', 'low_consum', 
          'high_consum']

y= df_energy_app["Appliances"]
y = (y<y.mean()).astype(int)
X = df_energy_app[features]
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=40)

### KNN

In [9]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
knn_model = KNeighborsClassifier(n_neighbors=3)


knn_model.fit(X_train,y_train)
knn_model.score(X_test,y_test)


0.8389057750759878

In [10]:
y_predict = knn_model.predict(X_test)
mse_predict = MSE(y_predict,y_test)
rmse_score = mse_predict**(1/2)
rmse_score

0.4013654505858871

### DecisionTreeRegression

In [11]:
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor
dec_tree_reg = DecisionTreeRegressor(max_depth=4)
y = df_energy_app["Appliances"]
X = df_energy_app[features]

dec_tree_reg.fit(X_train,y_train)

DecisionTreeRegressor(max_depth=4)

In [12]:
from sklearn.metrics import mean_squared_error as MSE
y_predict = dec_tree_reg.predict(X_test)
mse_predict = MSE(y_predict,y_test)
rmse_score = mse_predict**(1/2)

In [13]:
rmse_score

0.30225000072695846

In [14]:
dec_tree_reg.score(X_test,y_test)

0.6240737770822453

### Random Forest

In [16]:
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score
random_forest = RandomForestClassifier(n_estimators=30,random_state=111)
random_forest.fit(X_train,y_train)
y_tahmin = random_forest.predict(X_test)
rmse_score = MSE(y_test,y_tahmin)**(1/2)

In [17]:
rmse_score

0.328485518040461

In [18]:
accuracy_score(y_test,y_tahmin)

0.89209726443769

### SVM

In [32]:
from sklearn.svm import SVR

svr_reg = SVR(kernel="linear",C=1)
svr_reg.fit(X_train,y_train)
y_predict = svr_reg.predict(X_test)
y_test = np.asanyarray(y_test)
y_predict = np.asanyarray(y_predict)
rmse_linear = MSE(y_test,y_predict)**(1/2)
rmse_linear                                            #rmse değerini neden hesaplamadı?
                                                #accuracy_score değeri için de hata veriyor

0.34432484776794703

In [30]:
svr_reg.score(X_test,y_test)

0.5121270624560583

In [None]:
#parametre ayarı

In [34]:
svr_reg = SVR(kernel="rbf",C=100)
svr_reg.fit(X_train,y_train)
y_predict = svr_reg.predict(X_test)

rmse_rbf = MSE(y_test,y_predict)**(1/2)
rmse_rbf

0.32553471372445014

In [35]:
svr_reg = SVR(kernel="poly",C=10)
svr_reg.fit(X_train,y_train)
y_predict = svr_reg.predict(X_test)

rmse_poly = MSE(y_test,y_predict)**(1/2)
rmse_poly

0.32992924098012255

## with Classification project

In [36]:
from sklearn.metrics import classification_report, precision_recall_fscore_support
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
import os
from nltk.corpus import stopwords
from nltk.corpus import words
from nltk.stem import PorterStemmer

In [37]:
df_bbc = pd.read_csv("clean_bbc")

In [38]:
vectorizer = CountVectorizer()
vectorizer.fit(df_bbc["text"])

CountVectorizer()

In [39]:
text_vector = vectorizer.fit_transform(df_bbc.text)
text_vector_array = text_vector.toarray()
features = vectorizer.get_feature_names()

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
Tfidf_Vector = TfidfVectorizer(min_df = 0., max_df = 1., use_idf = True)
Tfidf_Matrix = Tfidf_Vector.fit_transform(df_bbc.text)
Tfidf_Matrix = Tfidf_Matrix.toarray()
print(Tfidf_Matrix)

features = Tfidf_Vector.get_feature_names()

Tfidf_df = pd.DataFrame(Tfidf_Matrix, columns = features)
Tfidf_df.head()

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [None]:
X_cl = Tfidf_Matrix
y_cl = df_bbc.category.values

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X_cl,y_cl,test_size=0.2,random_state=111)

In [None]:
y_cl

### KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
knn_model = KNeighborsClassifier(n_neighbors=5)


#y_predict = knn_model.predict(X_test)
knn_model.fit(X_train,y_train)
knn_model.score(X_test,y_test)

### DecisionTreeClassifier

In [None]:
import scipy
from sklearn.tree import DecisionTreeClassifier, export_graphviz
import pydotplus
import graphviz

In [None]:
dec_tree_model = DecisionTreeClassifier(
    criterion='entropy',
    max_features=10,
    max_depth=32,
    random_state = 1337)

dec_tree_model.fit(X_train,y_train)
y_predict = dec_tree_model.predict(X_test)
dec_tree_model.score(X_test,y_test)

### LogisticRegression

In [None]:
log_reg_model = LogisticRegression(multi_class="multinomial",solver="lbfgs")
log_reg_model.fit(X_train,y_train)
log_reg_model.score(X_test,y_test)

### RandomForest

In [None]:
from sklearn.ensemble import RandomForestClassifier
random_forest = RandomForestClassifier(n_estimators=30,random_state=111)
random_forest.fit(X_train,y_train)
y_predict = random_forest.predict(X_test)
random_forest.score(X_test,y_test)

### SVC

In [None]:
from sklearn.svm import SVC

svc = SVC(kernel="linear",C=1)
svc.fit(X_train,y_train)
y_predict = svc.predict(X_test)
accuracy_score(y_test,y_predict)

In [None]:
svc = SVC(kernel="rbf",C=1)
svc.fit(X_train,y_train)
y_predict = svc.predict(X_test)
accuracy_score(y_test,y_predict)

In [None]:
svc = SVC(kernel="poly",C=1)
svc.fit(X_train,y_train)
y_predict = svc.predict(X_test)
accuracy_score(y_test,y_predict)