In [None]:
import argparse
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.externals import joblib  # To save and load models
import numpy as np

def main(model_name, dataset_file, new_data=None):
    df = pd.read_csv(dataset_file)

    df.index = df['timestamp']
    metric_columns = ['bandwidth', 'throughput', 'congestion', 'packet_loss', 'latency', 'jitter', 'anomaly']
    df = df[metric_columns]

    X = df.drop('anomaly', axis=1)
    y = df['anomaly']

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Load model if it's an existing model
    if model_name in ['logistic_regression', 'random_forest', 'svm']:
        model = joblib.load(f"{model_name}_model.pkl")
    else:
        raise ValueError("Model name should be one of: logistic_regression, random_forest, svm")

    if new_data:
        new_data_list = new_data.split(',')
        new_data_array = np.array(new_data_list, dtype=float).reshape(1, -1)
        X_new_scaled = scaler.transform(new_data_array)
        prediction = model.predict(X_new_scaled)
        print(f"Prediction for the new data using {model_name.capitalize()} model:", prediction[0])
    else:
        predictions = model.predict(X_scaled)
        anomaly_percentage = (np.sum(predictions) / len(predictions)) * 100
        print(f"Percentage of anomalies in the predictions of {model_name.capitalize()}: {anomaly_percentage}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Train and evaluate a classification model.')
    parser.add_argument('model_name', type=str, help='Name of the model to use (logistic_regression, random_forest, svm)')
    parser.add_argument('dataset_file', type=str, help='Path to the dataset file')
    parser.add_argument('--new_data', nargs='+', type=float, help='New data for prediction (comma separated values)')
    args = parser.parse_args()

    new_data = None
    if args.new_data:
        new_data = ','.join(map(str, args.new_data))

    main(args.model_name, args.dataset_file, new_data)


In [None]:
python classification.py logistic_regression dataset.csv --new_data 1.0 2.0 3.0 4.0 5.0 6.0