In [1]:
from sklearn.model_selection import cross_validation
import lightgbm as lgb
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree
import matplotlib.pyplot as plot


# load the dataset (local path)
url = "Desktop/Capstone/data.csv"
# feature names
features = ["MDVP:Fo(Hz)","MDVP:Fhi(Hz)","MDVP:Flo(Hz)","MDVP:Jitter(%)","MDVP:Jitter(Abs)","MDVP:RAP","MDVP:PPQ","Jitter:DDP","MDVP:Shimmer","MDVP:Shimmer(dB)","Shimmer:APQ3","Shimmer:APQ5","MDVP:APQ","Shimmer:DDA","NHR","HNR","RPDE","DFA","spread1","spread2","D2","PPE","status"]
dataset = pandas.read_csv(url, names = features)
plot.show()
# store the dataset as an array for easier processing
array = dataset.values
scaler = MinMaxScaler(feature_range=(0,1))
scaled = scaler.fit_transform(array)
# X stores feature values
X = scaled[:,0:22]
# Y stores "answers", the flower species / class (every row, 4th column)
Y = scaled[:,22]
validation_size = 0.25
# randomize which part of the data is training and which part is validation
seed = 7
# split dataset into training set (75%) and validation set (25%)
X_train, X_validation, Y_train, Y_validation = cross_validation.train_test_split(X, Y, test_size = validation_size, random_state = seed)

# 10-fold cross validation to estimate accuracy (split data into 10 parts; use 9 parts to train and 1 for test)
num_folds = 10
num_instances = len(X_train)
seed = 7
# use the 'accuracy' metric to evaluate models (correct / total)
scoring = 'accuracy'

# algorithms / models
models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('DT', DecisionTreeClassifier()))
models.append(('NN', MLPClassifier(solver='lbfgs')))
models.append(('NB', GaussianNB()))
models.append(('GB', GradientBoostingClassifier(n_estimators=10000)))
#models.append(('LGBM',lgb()))

# evaluate each algorithm / model
results = []
names = []
accuracy = []
mat_coef= []
print("Scores for each algorithm:")
for name, model in models:
    kfold = cross_validation.KFold(int n = num_instances, n_folds = num_folds, random_state = seed)
    cv_results = cross_validation.cross_val_score(model, X_train, Y_train, cv = kfold, scoring = scoring)
    results.append(cv_results)
    names.append(name)
    model.fit(X_train, Y_train)
    predictions = model.predict(X_validation)
    accuracy.append(accuracy_score(Y_validation, predictions)*100)
    mat_coef.append(matthews_corrcoef(Y_validation, predictions))
    if name =="DT":
        tree.export_graphviz(model, out_file="tree.dot")
    print('*******************',name,'*******************')
    print("Confusion matrix for",name,'\n',confusion_matrix(Y_validation, predictions))
    print(classification_report(Y_validation, predictions))
    print('Accuracy Score:',accuracy_score(Y_validation, predictions)*100)
    print('Matthews Correlation Coeficient:',matthews_corrcoef(Y_validation, predictions),'\n')


plot.figure(figsize=(10,5))
plot.subplot(121)
plot.bar(names,accuracy)
plot.xlabel('Algorithm names')
plot.ylabel('Accuracy in %')
plot.subplot(122)
plot.bar(names,mat_coef)
plot.xlabel('Algorithm names')
plot.ylabel('Matthews Correlation Coeficient')
plot.suptitle("Comparison of the Algorithm used")
plot.show()

SyntaxError: invalid syntax (<ipython-input-1-ca87fc20c953>, line 64)

In [2]:
from sklearn.model_selection import cross_validation
import lightgbm as lgb
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree
import matplotlib.pyplot as plot


# load the dataset (local path)
url = "Desktop/Capstone/data.csv"
# feature names
features = ["MDVP:Fo(Hz)","MDVP:Fhi(Hz)","MDVP:Flo(Hz)","MDVP:Jitter(%)","MDVP:Jitter(Abs)","MDVP:RAP","MDVP:PPQ","Jitter:DDP","MDVP:Shimmer","MDVP:Shimmer(dB)","Shimmer:APQ3","Shimmer:APQ5","MDVP:APQ","Shimmer:DDA","NHR","HNR","RPDE","DFA","spread1","spread2","D2","PPE","status"]
dataset = pandas.read_csv(url, names = features)
plot.show()
# store the dataset as an array for easier processing
array = dataset.values
scaler = MinMaxScaler(feature_range=(0,1))
scaled = scaler.fit_transform(array)
# X stores feature values
X = scaled[:,0:22]
# Y stores "answers", the flower species / class (every row, 4th column)
Y = scaled[:,22]
validation_size = 0.25
# randomize which part of the data is training and which part is validation
seed = 7
# split dataset into training set (75%) and validation set (25%)
X_train, X_validation, Y_train, Y_validation = cross_validation.train_test_split(X, Y, test_size = validation_size, random_state = seed)

# 10-fold cross validation to estimate accuracy (split data into 10 parts; use 9 parts to train and 1 for test)
num_folds = 10
num_instances = len(X_train)
seed = 7
# use the 'accuracy' metric to evaluate models (correct / total)
scoring = 'accuracy'

# algorithms / models
models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('DT', DecisionTreeClassifier()))
models.append(('NN', MLPClassifier(solver='lbfgs')))
models.append(('NB', GaussianNB()))
models.append(('GB', GradientBoostingClassifier(n_estimators=10000)))
#models.append(('LGBM',lgb()))

# evaluate each algorithm / model
results = []
names = []
accuracy = []
mat_coef= []
print("Scores for each algorithm:")
for name, model in models:
    kfold = cross_validation.KFold(n = num_instances, n_folds = num_folds, random_state = seed)
    cv_results = cross_validation.cross_val_score(model, X_train, Y_train, cv = kfold, scoring = scoring)
    results.append(cv_results)
    names.append(name)
    model.fit(X_train, Y_train)
    predictions = model.predict(X_validation)
    accuracy.append(accuracy_score(Y_validation, predictions)*100)
    mat_coef.append(matthews_corrcoef(Y_validation, predictions))
    if name =="DT":
        tree.export_graphviz(model, out_file="tree.dot")
    print('*******************',name,'*******************')
    print("Confusion matrix for",name,'\n',confusion_matrix(Y_validation, predictions))
    print(classification_report(Y_validation, predictions))
    print('Accuracy Score:',accuracy_score(Y_validation, predictions)*100)
    print('Matthews Correlation Coeficient:',matthews_corrcoef(Y_validation, predictions),'\n')


plot.figure(figsize=(10,5))
plot.subplot(121)
plot.bar(names,accuracy)
plot.xlabel('Algorithm names')
plot.ylabel('Accuracy in %')
plot.subplot(122)
plot.bar(names,mat_coef)
plot.xlabel('Algorithm names')
plot.ylabel('Matthews Correlation Coeficient')
plot.suptitle("Comparison of the Algorithm used")
plot.show()

ImportError: cannot import name 'cross_validation'

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import Kfold
import lightgbm as lgb
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree
import matplotlib.pyplot as plot


# load the dataset (local path)
url = "Desktop/Capstone/data.csv"
# feature names
features = ["MDVP:Fo(Hz)","MDVP:Fhi(Hz)","MDVP:Flo(Hz)","MDVP:Jitter(%)","MDVP:Jitter(Abs)","MDVP:RAP","MDVP:PPQ","Jitter:DDP","MDVP:Shimmer","MDVP:Shimmer(dB)","Shimmer:APQ3","Shimmer:APQ5","MDVP:APQ","Shimmer:DDA","NHR","HNR","RPDE","DFA","spread1","spread2","D2","PPE","status"]
dataset = pandas.read_csv(url, names = features)
plot.show()
# store the dataset as an array for easier processing
array = dataset.values
scaler = MinMaxScaler(feature_range=(0,1))
scaled = scaler.fit_transform(array)
# X stores feature values
X = scaled[:,0:22]
# Y stores "answers", the flower species / class (every row, 4th column)
Y = scaled[:,22]
validation_size = 0.25
# randomize which part of the data is training and which part is validation
seed = 7
# split dataset into training set (75%) and validation set (25%)
X_train, X_validation, Y_train, Y_validation = train_test_split(X, Y, test_size = validation_size, random_state = seed)

# 10-fold cross validation to estimate accuracy (split data into 10 parts; use 9 parts to train and 1 for test)
num_folds = 10
num_instances = len(X_train)
seed = 7
# use the 'accuracy' metric to evaluate models (correct / total)
scoring = 'accuracy'

# algorithms / models
models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('DT', DecisionTreeClassifier()))
models.append(('NN', MLPClassifier(solver='lbfgs')))
models.append(('NB', GaussianNB()))
models.append(('GB', GradientBoostingClassifier(n_estimators=10000)))
#models.append(('LGBM',lgb()))

# evaluate each algorithm / model
results = []
names = []
accuracy = []
mat_coef= []
print("Scores for each algorithm:")
for name, model in models:
    kfold = KFold(int n = num_instances, n_folds = num_folds, random_state = seed)
    cv_results = cross_val_score(model, X_train, Y_train, cv = kfold, scoring = scoring)
    results.append(cv_results)
    names.append(name)
    model.fit(X_train, Y_train)
    predictions = model.predict(X_validation)
    accuracy.append(accuracy_score(Y_validation, predictions)*100)
    mat_coef.append(matthews_corrcoef(Y_validation, predictions))
    if name =="DT":
        tree.export_graphviz(model, out_file="tree.dot")
    print('*******************',name,'*******************')
    print("Confusion matrix for",name,'\n',confusion_matrix(Y_validation, predictions))
    print(classification_report(Y_validation, predictions))
    print('Accuracy Score:',accuracy_score(Y_validation, predictions)*100)
    print('Matthews Correlation Coeficient:',matthews_corrcoef(Y_validation, predictions),'\n')


plot.figure(figsize=(10,5))
plot.subplot(121)
plot.bar(names,accuracy)
plot.xlabel('Algorithm names')
plot.ylabel('Accuracy in %')
plot.subplot(122)
plot.bar(names,mat_coef)
plot.xlabel('Algorithm names')
plot.ylabel('Matthews Correlation Coeficient')
plot.suptitle("Comparison of the Algorithm used")
plot.show()

SyntaxError: invalid syntax (<ipython-input-3-46e9022c9635>, line 66)

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import Kfold
import lightgbm as lgb
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree
import matplotlib.pyplot as plot


# load the dataset (local path)
url = "Desktop/Capstone/data.csv"
# feature names
features = ["MDVP:Fo(Hz)","MDVP:Fhi(Hz)","MDVP:Flo(Hz)","MDVP:Jitter(%)","MDVP:Jitter(Abs)","MDVP:RAP","MDVP:PPQ","Jitter:DDP","MDVP:Shimmer","MDVP:Shimmer(dB)","Shimmer:APQ3","Shimmer:APQ5","MDVP:APQ","Shimmer:DDA","NHR","HNR","RPDE","DFA","spread1","spread2","D2","PPE","status"]
dataset = pandas.read_csv(url, names = features)
plot.show()
# store the dataset as an array for easier processing
array = dataset.values
scaler = MinMaxScaler(feature_range=(0,1))
scaled = scaler.fit_transform(array)
# X stores feature values
X = scaled[:,0:22]
# Y stores "answers", the flower species / class (every row, 4th column)
Y = scaled[:,22]
validation_size = 0.25
# randomize which part of the data is training and which part is validation
seed = 7
# split dataset into training set (75%) and validation set (25%)
X_train, X_validation, Y_train, Y_validation = train_test_split(X, Y, test_size = validation_size, random_state = seed)

# 10-fold cross validation to estimate accuracy (split data into 10 parts; use 9 parts to train and 1 for test)
num_folds = 10
num_instances = len(X_train)
seed = 7
# use the 'accuracy' metric to evaluate models (correct / total)
scoring = 'accuracy'

# algorithms / models
models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('DT', DecisionTreeClassifier()))
models.append(('NN', MLPClassifier(solver='lbfgs')))
models.append(('NB', GaussianNB()))
models.append(('GB', GradientBoostingClassifier(n_estimators=10000)))
#models.append(('LGBM',lgb()))

# evaluate each algorithm / model
results = []
names = []
accuracy = []
mat_coef= []
print("Scores for each algorithm:")
for name, model in models:
    kfold = KFold(num_instances=10, n_folds = num_folds, random_state = seed)
    cv_results = cross_val_score(model, X_train, Y_train, cv = kfold, scoring = scoring)
    results.append(cv_results)
    names.append(name)
    model.fit(X_train, Y_train)
    predictions = model.predict(X_validation)
    accuracy.append(accuracy_score(Y_validation, predictions)*100)
    mat_coef.append(matthews_corrcoef(Y_validation, predictions))
    if name =="DT":
        tree.export_graphviz(model, out_file="tree.dot")
    print('*******************',name,'*******************')
    print("Confusion matrix for",name,'\n',confusion_matrix(Y_validation, predictions))
    print(classification_report(Y_validation, predictions))
    print('Accuracy Score:',accuracy_score(Y_validation, predictions)*100)
    print('Matthews Correlation Coeficient:',matthews_corrcoef(Y_validation, predictions),'\n')


plot.figure(figsize=(10,5))
plot.subplot(121)
plot.bar(names,accuracy)
plot.xlabel('Algorithm names')
plot.ylabel('Accuracy in %')
plot.subplot(122)
plot.bar(names,mat_coef)
plot.xlabel('Algorithm names')
plot.ylabel('Matthews Correlation Coeficient')
plot.suptitle("Comparison of the Algorithm used")
plot.show()

ImportError: cannot import name 'Kfold'

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
import lightgbm as lgb
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree
import matplotlib.pyplot as plot


# load the dataset (local path)
url = "Desktop/Capstone/data.csv"
# feature names
features = ["MDVP:Fo(Hz)","MDVP:Fhi(Hz)","MDVP:Flo(Hz)","MDVP:Jitter(%)","MDVP:Jitter(Abs)","MDVP:RAP","MDVP:PPQ","Jitter:DDP","MDVP:Shimmer","MDVP:Shimmer(dB)","Shimmer:APQ3","Shimmer:APQ5","MDVP:APQ","Shimmer:DDA","NHR","HNR","RPDE","DFA","spread1","spread2","D2","PPE","status"]
dataset = pandas.read_csv(url, names = features)
plot.show()
# store the dataset as an array for easier processing
array = dataset.values
scaler = MinMaxScaler(feature_range=(0,1))
scaled = scaler.fit_transform(array)
# X stores feature values
X = scaled[:,0:22]
# Y stores "answers", the flower species / class (every row, 4th column)
Y = scaled[:,22]
validation_size = 0.25
# randomize which part of the data is training and which part is validation
seed = 7
# split dataset into training set (75%) and validation set (25%)
X_train, X_validation, Y_train, Y_validation = train_test_split(X, Y, test_size = validation_size, random_state = seed)

# 10-fold cross validation to estimate accuracy (split data into 10 parts; use 9 parts to train and 1 for test)
num_folds = 10
num_instances = len(X_train)
seed = 7
# use the 'accuracy' metric to evaluate models (correct / total)
scoring = 'accuracy'

# algorithms / models
models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('DT', DecisionTreeClassifier()))
models.append(('NN', MLPClassifier(solver='lbfgs')))
models.append(('NB', GaussianNB()))
models.append(('GB', GradientBoostingClassifier(n_estimators=10000)))
#models.append(('LGBM',lgb()))

# evaluate each algorithm / model
results = []
names = []
accuracy = []
mat_coef= []
print("Scores for each algorithm:")
for name, model in models:
    kfold = KFold(num_instances=10, n_folds = num_folds, random_state = seed)
    cv_results = cross_val_score(model, X_train, Y_train, cv = kfold, scoring = scoring)
    results.append(cv_results)
    names.append(name)
    model.fit(X_train, Y_train)
    predictions = model.predict(X_validation)
    accuracy.append(accuracy_score(Y_validation, predictions)*100)
    mat_coef.append(matthews_corrcoef(Y_validation, predictions))
    if name =="DT":
        tree.export_graphviz(model, out_file="tree.dot")
    print('*******************',name,'*******************')
    print("Confusion matrix for",name,'\n',confusion_matrix(Y_validation, predictions))
    print(classification_report(Y_validation, predictions))
    print('Accuracy Score:',accuracy_score(Y_validation, predictions)*100)
    print('Matthews Correlation Coeficient:',matthews_corrcoef(Y_validation, predictions),'\n')


plot.figure(figsize=(10,5))
plot.subplot(121)
plot.bar(names,accuracy)
plot.xlabel('Algorithm names')
plot.ylabel('Accuracy in %')
plot.subplot(122)
plot.bar(names,mat_coef)
plot.xlabel('Algorithm names')
plot.ylabel('Matthews Correlation Coeficient')
plot.suptitle("Comparison of the Algorithm used")
plot.show()

NameError: name 'pandas' is not defined

In [6]:
import pandas
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
import lightgbm as lgb
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree
import matplotlib.pyplot as plot


# load the dataset (local path)
url = "Desktop/Capstone/data.csv"
# feature names
features = ["MDVP:Fo(Hz)","MDVP:Fhi(Hz)","MDVP:Flo(Hz)","MDVP:Jitter(%)","MDVP:Jitter(Abs)","MDVP:RAP","MDVP:PPQ","Jitter:DDP","MDVP:Shimmer","MDVP:Shimmer(dB)","Shimmer:APQ3","Shimmer:APQ5","MDVP:APQ","Shimmer:DDA","NHR","HNR","RPDE","DFA","spread1","spread2","D2","PPE","status"]
dataset = pandas.read_csv(url, names = features)
plot.show()
# store the dataset as an array for easier processing
array = dataset.values
scaler = MinMaxScaler(feature_range=(0,1))
scaled = scaler.fit_transform(array)
# X stores feature values
X = scaled[:,0:22]
# Y stores "answers", the flower species / class (every row, 4th column)
Y = scaled[:,22]
validation_size = 0.25
# randomize which part of the data is training and which part is validation
seed = 7
# split dataset into training set (75%) and validation set (25%)
X_train, X_validation, Y_train, Y_validation = train_test_split(X, Y, test_size = validation_size, random_state = seed)

# 10-fold cross validation to estimate accuracy (split data into 10 parts; use 9 parts to train and 1 for test)
num_folds = 10
num_instances = len(X_train)
seed = 7
# use the 'accuracy' metric to evaluate models (correct / total)
scoring = 'accuracy'

# algorithms / models
models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('DT', DecisionTreeClassifier()))
models.append(('NN', MLPClassifier(solver='lbfgs')))
models.append(('NB', GaussianNB()))
models.append(('GB', GradientBoostingClassifier(n_estimators=10000)))
#models.append(('LGBM',lgb()))

# evaluate each algorithm / model
results = []
names = []
accuracy = []
mat_coef= []
print("Scores for each algorithm:")
for name, model in models:
    kfold = KFold(num_instances=10, n_folds = num_folds, random_state = seed)
    cv_results = cross_val_score(model, X_train, Y_train, cv = kfold, scoring = scoring)
    results.append(cv_results)
    names.append(name)
    model.fit(X_train, Y_train)
    predictions = model.predict(X_validation)
    accuracy.append(accuracy_score(Y_validation, predictions)*100)
    mat_coef.append(matthews_corrcoef(Y_validation, predictions))
    if name =="DT":
        tree.export_graphviz(model, out_file="tree.dot")
    print('*******************',name,'*******************')
    print("Confusion matrix for",name,'\n',confusion_matrix(Y_validation, predictions))
    print(classification_report(Y_validation, predictions))
    print('Accuracy Score:',accuracy_score(Y_validation, predictions)*100)
    print('Matthews Correlation Coeficient:',matthews_corrcoef(Y_validation, predictions),'\n')


plot.figure(figsize=(10,5))
plot.subplot(121)
plot.bar(names,accuracy)
plot.xlabel('Algorithm names')
plot.ylabel('Accuracy in %')
plot.subplot(122)
plot.bar(names,mat_coef)
plot.xlabel('Algorithm names')
plot.ylabel('Matthews Correlation Coeficient')
plot.suptitle("Comparison of the Algorithm used")
plot.show()

FileNotFoundError: File b'Desktop/Capstone/data.csv' does not exist

In [1]:
# -*- coding: utf-8 -*-
"""
Created on Sat May  9 19:34:46 2020

@author: chouh
"""

import lightgbm as lgb
import numpy as np
train_data = lgb.Dataset('Desktop/parkinsons.data')
data = np.random.rand(500, 10)  # 500 entities, each contains 10 features
label = np.random.randint(2, size=500)  # binary target
train_data = lgb.Dataset(data, label=label)
validation_data = train_data.create_valid('validation.svm')
train_data = lgb.Dataset(data, label=label, feature_name=['c1', 'c2', 'c3'], categorical_feature=['c3'])
w = np.random.rand(500, )
train_data = lgb.Dataset(data, label=label, weight=w)
param = {'num_leaves': 31, 'objective': 'binary'}
param['metric'] = 'auc'
param['metric'] = ['auc', 'binary_logloss']
num_round = 10
bst = lgb.train(param, train_data, num_round, valid_sets=[validation_data])
bst.save_model('model.txt')
json_model = bst.dump_model()
bst = lgb.Booster(model_file='model.txt')  # init model
lgb.cv(param, train_data, num_round, nfold=5)
data = np.random.rand(7, 10)
ypred = bst.predict(data)
ypred = bst.predict(data, num_iteration=bst.best_iteration)


LightGBMError: Cannot open data file validation.svm