<a href="https://colab.research.google.com/github/mobarak91/Edge_IT20002/blob/main/SVM_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Building a Machine Learning Model (SVM/CNN) using Scikitlearn and exporting it using Pickle

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import pickle
from sklearn.preprocessing import StandardScaler

try:
    # Load the dataset
    dataset = pd.read_csv('lung_cancer.csv')
    print(dataset.head())

    # Check for missing values and handle them (if necessary)
    if dataset.isnull().sum().sum() > 0:
        print("Warning: Missing values detected! Filling with mean values.")
        dataset.fillna(dataset.mean(), inplace=True)

    # Convert categorical data to numeric (if any)
    dataset = pd.get_dummies(dataset)

    # Define features and target variable
    X = dataset.iloc[:, :-1]  # Features (all columns except the last)
    y = dataset.iloc[:, -1]   # Target variable (last column)

    # Split the dataset into training and test sets (80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Scale or normalize input data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Initialize the RandomForestClassifier
    clf = RandomForestClassifier(n_estimators=100, random_state=42)

    # Train the classifier
    clf.fit(X_train, y_train)

    # Predict on the test set
    y_pred = clf.predict(X_test)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

    # Save model and scaler
    pickle.dump(clf, open("model.sav", "wb"))
    pickle.dump(scaler, open("scalermodel.sav", "wb"))

except FileNotFoundError:
    print("Error: The specified CSV file was not found. Please check the file path.")
except pd.errors.EmptyDataError:
    print("Error: The CSV file is empty. Please provide a valid dataset.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


  GENDER  AGE  SMOKING  YELLOW_FINGERS  ANXIETY  PEER_PRESSURE  \
0      M   69        1               2        2              1   
1      M   74        2               1        1              1   
2      F   59        1               1        1              2   
3      M   63        2               2        2              1   
4      F   63        1               2        1              1   

   CHRONIC DISEASE  FATIGUE   ALLERGY   WHEEZING  ALCOHOL CONSUMING  COUGHING  \
0                1         2         1         2                  2         2   
1                2         2         2         1                  1         1   
2                1         2         1         2                  1         2   
3                1         1         1         1                  2         1   
4                1         1         1         2                  1         2   

   SHORTNESS OF BREATH  SWALLOWING DIFFICULTY  CHEST PAIN LUNG_CANCER  
0                    2                      

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
import pickle

def train_model_from_csv(file_path, target_column):
    try:
        # Load the dataset
        dataset = pd.read_csv("/content/lung_cancer.csv")

        # Check for missing values and handle them
        if dataset.isnull().sum().sum() > 0:
            print("Warning: Missing values detected! Filling with mean values.")
            dataset.fillna(dataset.mean(), inplace=True)

        # Convert categorical data to numeric (if any)
        dataset = pd.get_dummies(dataset)

        # Separate features and target variable
        if target_column not in dataset.columns:
            raise ValueError(f"Target column '{target_column}' not found in the dataset.")
        X = dataset.drop(columns=[target_column])
        y = dataset[target_column]

        # Split the dataset into training and test sets (80% train, 20% test)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Scale the features
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        # Initialize the Logistic Regression model
        model = LogisticRegression(random_state=42, max_iter=1000)

        # Train the model
        model.fit(X_train, y_train)

        # Predict on the test set
        y_pred = model.predict(X_test)

        # Evaluate the model
        accuracy = accuracy_score(y_test, y_pred)
        print(f"Accuracy: {accuracy:.4f}")
        print("Classification Report:")
        print(classification_report(y_test, y_pred))

        # Save the model and scaler
        pickle.dump(model, open("logistic_model.sav", "wb"))
        pickle.dump(scaler, open("scaler.sav", "wb"))
        print("Model and scaler saved successfully!")

    except FileNotFoundError:
        print("Error: The specified CSV file was not found. Please check the file path.")
    except pd.errors.EmptyDataError:
        print("Error: The CSV file is empty. Please provide a valid dataset.")
    except ValueError as ve:
        print(f"Value Error: {ve}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

# Example usage:
csv_file_path = "lung_cancer.csv"  # Replace with your CSV file path
target_column_name = "target"      # Replace with the target column name
train_model_from_csv(csv_file_path, target_column_name)


Value Error: Target column 'target' not found in the dataset.
