In [1]:
import pandas as pd
from pycaret.classification import *

# Convert document content to DataFrame
def get_dataframe():
    data = '''Time of accident,Type of accident,Vehicular involvement,Death,Injury,Cause of accident,Location of Accidents
9:00 AM,Head-on collision,CNG- covered van,1,1,Recklessly Driving,"Santhia, Pabna"
6:30 AM,Head-on collision,Votvoti-pickup van,2,0,Recklessly Driving,"Vitapara,Santhia Pabna"
# ... rest of your data ...'''
    
    df = pd.read_csv(StringIO(data))
    return df

# Prepare features
def prepare_features(df):
    # Extract hour from time
    df['Hour'] = pd.to_datetime(df['Time of accident'], format='%I:%M %p', errors='coerce').dt.hour
    
    # Create time categories
    df['Time_Category'] = pd.cut(df['Hour'], 
                                bins=[0, 6, 12, 18, 24], 
                                labels=['Night', 'Morning', 'Afternoon', 'Evening'])
    
    # Calculate severity
    df['Severity'] = df['Death'] * 2 + df['Injury']
    
    return df

# Train models
def train_classifier():
    # Get and prepare data
    df = get_dataframe()
    df = prepare_features(df)
    
    # Setup PyCaret
    clf = setup(data=df,
                target='Cause of accident',
                numeric_features=['Hour', 'Death', 'Injury', 'Severity'],
                categorical_features=['Type of accident', 'Vehicular involvement', 'Time_Category'],
                silent=True,
                session_id=123)
    
    # Compare and select best model
    best_model = compare_models(n_select=1)
    
    # Tune the model
    tuned_model = tune_model(best_model)
    
    # Generate predictions
    predictions = predict_model(tuned_model)
    
    # Plot feature importance
    plot_model(tuned_model, plot='feature')
    
    return tuned_model, predictions

# Main execution
if __name__ == "__main__":
    model, predictions = train_classifier()
    
    # Print performance metrics
    print("\nModel Performance:")
    print(pull())

NameError: name 'StringIO' is not defined

In [2]:
import pandas as pd
from io import StringIO

# Your data as a string
data = '''Time of accident,Type of accident,Vehicular involvement,Death,Injury,Cause of accident,Location of Accidents
9:00 AM,Head-on collision,CNG- covered van,1,1,Recklessly Driving,"Santhia, Pabna"
6:30 AM,Head-on collision,Votvoti-pickup van,2,0,Recklessly Driving,"Vitapara,Santhia Pabna"
# ... rest of your data ...'''

def get_dataframe():
    # Read the data from the string using StringIO
    df = pd.read_csv(StringIO(data))
    return df

def prepare_features(df):
    # Your feature preparation logic here
    return df

def train_classifier():
    # Get and prepare data
    df = get_dataframe()
    df = prepare_features(df)
    
    # Setup PyCaret or your classifier model
    # model, predictions = <PyCaret or classifier setup here>
    return df  # or return model and predictions

# Main execution
if __name__ == "__main__":
    df = train_classifier()
    print(df)  # Or whatever you want to do with the DataFrame


              Time of accident   Type of accident Vehicular involvement  \
0                      9:00 AM  Head-on collision      CNG- covered van   
1                      6:30 AM  Head-on collision    Votvoti-pickup van   
2  # ... rest of your data ...                NaN                   NaN   

   Death  Injury   Cause of accident   Location of Accidents  
0    1.0     1.0  Recklessly Driving          Santhia, Pabna  
1    2.0     0.0  Recklessly Driving  Vitapara,Santhia Pabna  
2    NaN     NaN                 NaN                     NaN  
