In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install mord
import pandas as pd
from mord import OrdinalRidge
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

file_path = '/content/drive/MyDrive/Toddler_Autism_dataset_July_2018.csv'
data = pd.read_csv(file_path)
print(data.columns)



Index(['Case_No', 'A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9', 'A10',
       'Age_Mons', 'Qchat-10-Score', 'Sex', 'Ethnicity', 'Jaundice',
       'Family_mem_with_ASD', 'Who completed the test', 'Class/ASD Traits '],
      dtype='object')


In [None]:
# Separate features (questionnaire responses) and the target label
X = data.drop('Class/ASD Traits ', axis=1)
y = data['Class/ASD Traits ']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Preprocessing pipeline
preprocessor = ColumnTransformer([
    ('categorical', OneHotEncoder(), ['Sex', 'Ethnicity', 'Jaundice', 'Family_mem_with_ASD'])
])


# Label encoding for the target label 'Class/ASD Traits'
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Create and train the Ordinal Regression model
ordinal_model = OrdinalRidge()
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', ordinal_model)
])
pipeline.fit(X_train, y_train_encoded)

# Evaluate the model on the test set
y_pred_encoded = pipeline.predict(X_test)
accuracy = accuracy_score(y_test_encoded, y_pred_encoded)
print("Accuracy on Test Set:", accuracy)

print("Original Class Labels:", label_encoder.classes_)

print("Predictions for Test Data:")
for i in range(len(X_test)):
    test_instance = X_test.iloc[[i]]  # Get a single row as DataFrame
    predicted_label = pipeline.predict(test_instance)
    true_label = y_test.iloc[i]
    print(f"Test Instance {i+1}: ASD Traits: {true_label}")



Accuracy on Test Set: 0.7264150943396226
Original Class Labels: ['No' 'Yes']
Predictions for Test Data:
Test Instance 1: ASD Traits: Yes
Test Instance 2: ASD Traits: No
Test Instance 3: ASD Traits: No
Test Instance 4: ASD Traits: Yes
Test Instance 5: ASD Traits: Yes
Test Instance 6: ASD Traits: No
Test Instance 7: ASD Traits: Yes
Test Instance 8: ASD Traits: No
Test Instance 9: ASD Traits: Yes
Test Instance 10: ASD Traits: No
Test Instance 11: ASD Traits: Yes
Test Instance 12: ASD Traits: No
Test Instance 13: ASD Traits: No
Test Instance 14: ASD Traits: No
Test Instance 15: ASD Traits: Yes
Test Instance 16: ASD Traits: Yes
Test Instance 17: ASD Traits: Yes
Test Instance 18: ASD Traits: Yes
Test Instance 19: ASD Traits: Yes
Test Instance 20: ASD Traits: Yes
Test Instance 21: ASD Traits: Yes
Test Instance 22: ASD Traits: No
Test Instance 23: ASD Traits: Yes
Test Instance 24: ASD Traits: Yes
Test Instance 25: ASD Traits: No
Test Instance 26: ASD Traits: Yes
Test Instance 27: ASD Traits: Y