In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error, recall_score, precision_score


# Load the dataset
file_path = "Khodshifte.xlsx"
df = pd.read_excel(file_path)

# Extract the features (from the third column to the last)
features = df.iloc[:, 0:]

# Step 1: Handle missing values (if any)
features = features.fillna(features.mean())

# Step 2: Encode categorical variables (if any)
label_encoder = LabelEncoder()
for column in features.select_dtypes(include=['object']).columns:
    features[column] = label_encoder.fit_transform(features[column])

# Step 3: Normalize numerical features
scaler = StandardScaler()
normalized_features = pd.DataFrame(scaler.fit_transform(features), columns=features.columns)

# Extract the output column (assuming it is the first column)
output_column = df.iloc[:, 0]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(normalized_features, output_column, test_size=0.2, random_state=42)

# Create a logistic regression model
logreg_model = LogisticRegression()

# Train the model on the training set
logreg_model.fit(X_train, y_train)

# Make predictions on the testing set
predictions = logreg_model.predict(X_test)

# Calculate and display the accuracy of the model
accuracy = accuracy_score(y_test, predictions)
mse = mean_squared_error(y_test, predictions)

print("Accuracy of the Logistic Regression model: {:.2f}%".format(accuracy * 100),"\n")

print("Mean squared error of the LogisticRegression model: {:.2f}%".format(mse),"\n")

mae = mean_absolute_error(y_test, predictions)
print("Mean absolute error of the LogisticRegression model: {:.2f}%".format(mae),"\n")

recall = recall_score(y_test, predictions.round(), average='weighted') * 100
print("Recall of the LogisticRegression model: {:.2f}%".format(recall),"\n")

precision = precision_score(y_test, predictions.round(), average='weighted') * 100
print("Precision of the LogisticRegression model: {:.2f}%".format(precision),"\n")

Accuracy of the Logistic Regression model: 71.43% 

Mean squared error of the LogisticRegression model: 7.14% 

Mean absolute error of the LogisticRegression model: 1.43% 

Recall of the LogisticRegression model: 71.43% 

Precision of the LogisticRegression model: 70.24% 



  features = features.fillna(features.mean())


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error, recall_score, precision_score

# Load the Excel file
file_path = "Khodshifte.xlsx"  # Replace with your file path
data = pd.read_excel(file_path)

# Separate input features (from the second to the last column) and output variable (first column)
X = data.iloc[:, 0:]
y = data.iloc[:, 0]

# Step 1: Separate numeric and non-numeric columns
numeric_cols = X.select_dtypes(include=['number']).columns
non_numeric_cols = X.select_dtypes(exclude=['number']).columns

# Step 2: Handle missing values using SimpleImputer with different strategies for numeric and non-numeric columns
imputer_numeric = SimpleImputer(strategy='mean')
imputer_non_numeric = SimpleImputer(strategy='most_frequent')

X_numeric = pd.DataFrame(imputer_numeric.fit_transform(X[numeric_cols]), columns=numeric_cols)
X_non_numeric = pd.DataFrame(imputer_non_numeric.fit_transform(X[non_numeric_cols]), columns=non_numeric_cols)

# Combine the numeric and non-numeric columns
X = pd.concat([X_numeric, X_non_numeric], axis=1)

# Step 3: Normalize input features using StandardScaler
scaler = StandardScaler()
X[numeric_cols] = scaler.fit_transform(X[numeric_cols])

# Step 4: Encode non-numeric data using LabelEncoder
label_encoder = LabelEncoder()

for column in non_numeric_cols:
    X[column] = label_encoder.fit_transform(X[column])

# Step 5: Create a Naive Bayes model (Gaussian Naive Bayes for continuous features)
model = Pipeline([
    ('classifier', GaussianNB())
])

# Step 6: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 7: Train the model
model.fit(X_train, y_train)

# Step 8: Make predictions on the test set
y_pred = model.predict(X_test)

# Step 9: Display the accuracy
accuracy = accuracy_score(y_test, y_pred)
mse = mean_squared_error(y_test,  y_pred)

print(f"Naive Bayes Model Accuracy: {accuracy * 100:.2f}%","\n")

print("Mean squared error of the Naive Bayes model: {:.2f}%".format(mse),"\n")

mae = mean_absolute_error(y_test, y_pred)
print("Mean absolute error of the Naive Bayes model: {:.2f}%".format(mae),"\n")

recall = recall_score(y_test, y_pred.round(), average='weighted') * 100
print("Recall of the Naive Bayes model: {:.2f}%".format(recall),"\n")

precision = precision_score(y_test, y_pred.round(), average='weighted') * 100
print("Precision of the Naive Bayes model: {:.2f}%".format(precision),"\n")

Naive Bayes Model Accuracy: 85.71% 

Mean squared error of the Naive Bayes model: 8.93% 

Mean absolute error of the Naive Bayes model: 1.07% 

Recall of the Naive Bayes model: 85.71% 

Precision of the Naive Bayes model: 82.65% 



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error, recall_score, precision_score

# Load the dataset
file_path = "Khodshifte.xlsx"
df = pd.read_excel(file_path)

# Extract the features (from the second column to the last)
X = df.iloc[:, 0:]

# Extract the output column (assuming it is the first column)
y = df.iloc[:, 0]

# Step 1: Handle missing values (if any)
X = X.fillna(X.mean())

# Step 2: Encode categorical variables (if any)
label_encoder = LabelEncoder()

for column in X.select_dtypes(include=['object']).columns:
    X[column] = label_encoder.fit_transform(X[column])

# Step 3: Normalize numerical features
scaler = StandardScaler()
X_normalized = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

# Create an SVR model
svr_model = SVR()

# Train the model on the training set
svr_model.fit(X_train, y_train)

# Make predictions on the testing set
predictions = svr_model.predict(X_test)

# Calculate and display the accuracy of the model (using Mean Squared Error as an example)
mse = mean_squared_error(y_test, predictions)
accuracy = 1 - mse / y_test.var()
print("Accuracy of the SVR model: {:.2f}%".format(accuracy * 100),"\n")

print("Mean squared error of the SVR model: {:.2f}%".format(mse),"\n")

mae = mean_absolute_error(y_test, predictions)
print("Mean absolute error of the SVR model: {:.2f}%".format(mae),"\n")

recall = recall_score(y_test, predictions.round(), average='weighted') * 100
print("Recall of the SVR model: {:.2f}%".format(recall),"\n")

precision = precision_score(y_test, predictions.round(), average='weighted') * 100
print("Precision of the SVR model: {:.2f}%".format(precision),"\n")

Accuracy of the SVR model: 41.03% 

Mean squared error of the SVR model: 13.28% 

Mean absolute error of the SVR model: 2.67% 

Recall of the SVR model: 14.29% 

Precision of the SVR model: 42.86% 



  X = X.fillna(X.mean())
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, precision_score, recall_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import sys

# Load the dataset
file_path = "Khodshifte.xlsx"
df = pd.read_excel(file_path)

# Extract the features (from the second column to the last)
X = df.iloc[:, 0:]  # Assuming the input features start from the second column
y = df.iloc[:, 0]   # Assuming the output is in the first column

# Step 1: Handle missing values (if any)
X = X.fillna(X.mean())

# Step 2: Encode categorical variables using OneHotEncoder
categorical_features = X.select_dtypes(include=['object']).columns
numeric_features = X.select_dtypes(include=['float64', 'int64']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Step 3: Apply Polynomial Regression
poly = PolynomialFeatures(degree=2)

# Create a pipeline with preprocessing and Polynomial Regression
model = Pipeline([
    ('preprocessor', preprocessor),
    ('poly', poly),
    ('regressor', LinearRegression())
])

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model on the training set
model.fit(X_train, y_train)

# Make predictions on the testing set
predictions = model.predict(X_test)

# Calculate and display the accuracy of the model (using Mean Squared Error as an example)
mse = mean_squared_error(y_test, predictions)
accuracy = 1 - mse / y_test.var()
print("Accuracy of the Polynomial Regression model: {:.2f}%".format(sys.getsizeof(accuracy)), "\n")

print("Mean squared error of the Polynomial Regression model: {:.2f}%".format(mse), "\n")

mae = mean_absolute_error(y_test, predictions)
print("Mean absolute error of the Polynomial Regression model: {:.2f}%".format(mae), "\n")

# Convert predictions to binary values based on the threshold
threshold = 3
binary_predictions = (predictions > threshold).astype(int)

# Ensure that there are positive predictions to calculate precision and recall
if 1 in binary_predictions:
    # Calculate and display precision and recall
    precision = precision_score(y_test, binary_predictions, average='micro')  # You can use 'macro' or 'weighted' as well
    recall = recall_score(y_test, binary_predictions, average='micro')

    print("Precision of the Polynomial Regression model: {:.2f}%".format(precision * 100), "\n")
    print("Recall of the Polynomial Regression model: {:.2f}%".format(recall* 100))
else:
    print("No positive predictions. Adjust the threshold or check the data.")

Accuracy of the Polynomial Regression model: 32.00% 

Mean squared error of the Polynomial Regression model: 3.89% 

Mean absolute error of the Polynomial Regression model: 1.66% 

Precision of the Polynomial Regression model: 42.86% 

Recall of the Polynomial Regression model: 42.86%


  X = X.fillna(X.mean())


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error, recall_score, precision_score

# Load the dataset
file_path = "Khodshifte.xlsx"
df = pd.read_excel(file_path)

# Extract the features (from the third column to the last)
features = df.iloc[:, 1:]

# Step 1: Handle missing values (if any)
features = features.fillna(features.mean())

# Step 2: Encode categorical variables (if any)
label_encoder = LabelEncoder()
for column in features.select_dtypes(include=['object']).columns:
    features[column] = label_encoder.fit_transform(features[column])

# Step 3: Normalize numerical features
scaler = StandardScaler()
normalized_features = pd.DataFrame(scaler.fit_transform(features), columns=features.columns)

# Extract the output column (assuming it is the first column)
output_column = df.iloc[:, 0]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(normalized_features, output_column, test_size=0.2, random_state=42)

# Create a Decision Tree model
decision_tree_model = DecisionTreeClassifier(random_state=42)

# Train the model on the training set
decision_tree_model.fit(X_train, y_train)

# Make predictions on the testing set
predictions = decision_tree_model.predict(X_test)

# Calculate and display the accuracy of the model
accuracy = accuracy_score(y_test, predictions)
mse = mean_squared_error(y_test, predictions)

print("Accuracy of the Decision Tree model: {:.2f}%".format(accuracy * 100), "\n")

print("Mean squared error of the Decision Tree model: {:.2f}%".format(mse), "\n")

mae = mean_absolute_error(y_test, predictions)
print("Mean absolute error of the Decision Tree model: {:.2f}%".format(mae), "\n")

recall = recall_score(y_test, predictions, average='weighted') * 100
print("Recall of the Decision Tree model: {:.2f}%".format(recall), "\n")

precision = precision_score(y_test, predictions, average='weighted') * 100
print("Precision of the Decision Tree model: {:.2f}%".format(precision), "\n")

Accuracy of the Decision Tree model: 28.57% 

Mean squared error of the Decision Tree model: 42.86% 

Mean absolute error of the Decision Tree model: 5.00% 

Recall of the Decision Tree model: 28.57% 

Precision of the Decision Tree model: 17.14% 



  features = features.fillna(features.mean())
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
