In [6]:
import pandas as pd

# Load the dataset
file_path = "C:/Users/rohit/Downloads/project_files/dataset/migraine_data.csv"
df = pd.read_csv(file_path)

# Display basic info
print(df.info())
print(df.head())

# Check for missing values
print(df.isnull().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 24 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Age          400 non-null    int64 
 1   Duration     400 non-null    int64 
 2   Frequency    400 non-null    int64 
 3   Location     400 non-null    int64 
 4   Character    400 non-null    int64 
 5   Intensity    400 non-null    int64 
 6   Nausea       400 non-null    int64 
 7   Vomit        400 non-null    int64 
 8   Phonophobia  400 non-null    int64 
 9   Photophobia  400 non-null    int64 
 10  Visual       400 non-null    int64 
 11  Sensory      400 non-null    int64 
 12  Dysphasia    400 non-null    int64 
 13  Dysarthria   400 non-null    int64 
 14  Vertigo      400 non-null    int64 
 15  Tinnitus     400 non-null    int64 
 16  Hypoacusis   400 non-null    int64 
 17  Diplopia     400 non-null    int64 
 18  Defect       400 non-null    int64 
 19  Ataxia       400 non-null    

In [9]:
print(df.columns)

Index(['Age', 'Duration', 'Frequency', 'Location', 'Character', 'Intensity',
       'Nausea', 'Vomit', 'Phonophobia', 'Photophobia', 'Visual', 'Sensory',
       'Dysphasia', 'Dysarthria', 'Vertigo', 'Tinnitus', 'Hypoacusis',
       'Diplopia', 'Defect', 'Ataxia', 'Conscience', 'Paresthesia', 'DPF',
       'Type'],
      dtype='object')


In [13]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Set the target column
target_col = 'Type'  # If 'Type' is the target column

# Handle missing values (drop or fill)
df = df.dropna()  # Drop rows with missing values

# Encode categorical columns
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    if col != target_col:  # Don't encode the target column here
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le  # Save encoder for future use

# Encode the target column
target_encoder = LabelEncoder()
y = target_encoder.fit_transform(df[target_col])  # Encode target labels

# Separate features (X) and target (y)
X = df.drop(columns=[target_col])  

# Normalize numerical data (if needed)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Data Preprocessing Complete!")


Data Preprocessing Complete!


In [15]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Train Support Vector Machine (SVM)
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)

# Train Logistic Regression
log_reg = LogisticRegression(random_state=42)
log_reg.fit(X_train, y_train)
log_pred = log_reg.predict(X_test)

# Train Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

# Evaluate Models
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))
print("Logistic Regression Accuracy:", accuracy_score(y_test, log_pred))
print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))

# Display Classification Reports
print("\nSVM Classification Report:\n", classification_report(y_test, svm_pred))
print("\nLogistic Regression Classification Report:\n", classification_report(y_test, log_pred))
print("\nRandom Forest Classification Report:\n", classification_report(y_test, rf_pred))


SVM Accuracy: 0.9375
Logistic Regression Accuracy: 0.95
Random Forest Accuracy: 0.925

SVM Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.83      0.91         6
           1       0.75      1.00      0.86         3
           2       1.00      1.00      1.00        13
           3       1.00      1.00      1.00         4
           4       0.00      0.00      0.00         2
           5       0.96      0.96      0.96        49
           6       1.00      1.00      1.00         3

    accuracy                           0.94        80
   macro avg       0.82      0.83      0.82        80
weighted avg       0.94      0.94      0.94        80


Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.83      0.91         6
           1       0.75      1.00      0.86         3
           2       1.00      1.00      1.00        13
           3       1.00   

In [21]:
import pickle

In [25]:
filename = 'migraine_model.sav'
pickle.dump(log_reg, open(filename, 'wb'))  # Replace log_reg with your trained model
print("Model saved successfully!")

Model saved successfully!


In [31]:
import numpy as np
# Load the saved model
filename = 'migraine_model.sav'
with open(filename, 'rb') as file:
    loaded_model = pickle.load(file)

print("Model loaded successfully!")

Model loaded successfully!


In [33]:
# Sample input (replace with real values from your dataset)
sample_input = np.array([[25, 4, 3, 1, 2, 8, 1, 0, 1, 1, 0, 2, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 2]])

# Make prediction
prediction = loaded_model.predict(sample_input)

# Print the predicted migraine type
print(f"Predicted Migraine Type: {prediction[0]}")


Predicted Migraine Type: 2
