In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.impute import SimpleImputer
import numpy as np
import firebase_admin
from firebase_admin import credentials, db

In [2]:
# Initialize Firebase
cred = credentials.Certificate("/content/Credentials.json")

In [3]:
firebase_admin.initialize_app(cred, {
    'databaseURL': 'https://team-parth-fe53f-default-rtdb.firebaseio.com/'
})

<firebase_admin.App at 0x7ec33d0ee680>

In [23]:
ref = db.reference("/")
data = ref.get()
print(data)

{'Arts and Humanities': {'options1': 'a', 'options2': 'a', 'options3': 'a', 'options4': 'a', 'options5': 'a', 'options6': 'a'}, 'Engineering Path': {'options': 'a', 'options1': 'b', 'options2': 'a', 'options3': 'c', 'options4': 'a', 'options5': 'a'}, 'Medical Path': {'options1': 'a', 'options2': 'a', 'options3': 'a', 'options4': 'a', 'options5': 'a', 'options6': 'a'}, 'y_pred': {'result_path': 'Engineering', 'value': [3]}}


In [5]:
# Load the dataset
df = pd.read_csv("career_path_datasetfinal.csv")

In [6]:
# Extract features and target variable
X = df.drop(['Student_ID', 'Preferred_Career_Path'], axis=1)
y = df['Preferred_Career_Path']

In [7]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:

# Handle missing values using SimpleImputer
imputer = SimpleImputer(strategy='mean')

# imputer = SimpleImputer(missing_values='NAN', strategy='constant', fill_value=4)


In [9]:
# Fit and transform the imputer on the training data
X_train_imputed = imputer.fit_transform(X_train)


In [10]:
# Transform the test data using the same imputer
X_test_imputed = imputer.transform(X_test)


In [11]:
# Initialize the Decision Tree classifier
model = DecisionTreeClassifier(random_state=42)




In [12]:

# Train the model on the imputed data
model.fit(X_train_imputed, y_train)


In [13]:
# Predict labels for the test set
y_pred = model.predict(X_test_imputed)


In [14]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)


In [15]:
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_rep)


Accuracy: 0.915
Classification Report:
              precision    recall  f1-score   support

           1       0.90      0.93      0.91       188
           2       0.93      0.89      0.91       182
           3       0.92      0.93      0.93       299
           4       0.91      0.89      0.90       131

    accuracy                           0.92       800
   macro avg       0.91      0.91      0.91       800
weighted avg       0.92      0.92      0.91       800



In [29]:
firebase_data = ref.get()
print(firebase_data)
# Retrieve new data from Firebase
new_data = ref.child('Arts and Humanities/options1').get()

{'Arts and Humanities': {'options1': 'a', 'options2': 'a', 'options3': 'a', 'options4': 'a', 'options5': 'a', 'options6': 'a'}, 'Engineering Path': {'options': 'a', 'options1': 'b', 'options2': 'a', 'options3': 'c', 'options4': 'a', 'options5': 'a'}, 'Medical Path': {'options1': 'a', 'options2': 'a', 'options3': 'a', 'options4': 'a', 'options5': 'a', 'options6': 'a'}, 'y_pred': {'result_path': 'Engineering', 'value': [3]}}


In [30]:
print(new_data)

a


In [26]:
# Assuming 'data' is a dictionary from Firebase
new_df = pd.DataFrame.from_dict(data, orient='index')

In [88]:
# Paths for different options
paths = [
    'Arts and Humanities/options1', 'Arts and Humanities/options2', 'Arts and Humanities/options3',
    'Arts and Humanities/options4', 'Arts and Humanities/options5', 'Arts and Humanities/options6',
    'Engineering Path/options', 'Engineering Path/options1', 'Engineering Path/options2',
    'Engineering Path/options3', 'Engineering Path/options4', 'Engineering Path/options5',
    'Medical Path/options1', 'Medical Path/options2', 'Medical Path/options3', 'Medical Path/options4',
    'Medical Path/options5', 'Medical Path/options6',
    'Commerce and Business/options1', 'Commerce and Business/options2', 'Commerce and Business/options3',
    'Commerce and Business/options4', 'Commerce and Business/options5', 'Commerce and Business/options6'
]

# Mapping dictionary
mapping_dict = {'a': 5, 'b': 6, 'c': 7, 'd': 8}

# Retrieve values from Firebase and encode them
encoded_values = []
for path in paths:
    data = ref.child(path).get()
    if data:
        encoded_values.extend([mapping_dict[val] for val in data])

# Convert the encoded values to a 1D array
array_1d = np.array(encoded_values).reshape(1, -1)

# Print the encoded array
print("Encoded 1D Array:", array_1d)

Encoded 1D Array: [[7 6 6 6 5 8 5 5 5 5 5 5 6 7 8 8 7 5 8 8 8 8 8 8]]


In [89]:
# Make predictions on the new data
y_pred = model.predict(array_1d)


In [90]:
# Map the predicted class to the corresponding career path
career_paths = {1: "Medical", 2: "Arts and Humanities", 3: "Engineering", 4: "Commerce"}

In [91]:
# Display the predicted career paths for each data point
for prediction in y_pred:
    if prediction == 1:
        print("Predicted Career Path: Medical")
    elif prediction == 2:
        print("Predicted Career Path: Arts and Humanities")
    elif prediction == 3:
        print("Predicted Career Path: Engineering")
    elif prediction == 4:
        print("Predicted Career Path: Commerce")
    else:
        print("Invalid result. Please enter a valid result (1, 2, 3, or 4).")

Predicted Career Path: Medical


In [92]:
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_rep)

Accuracy: 0.915
Classification Report:
              precision    recall  f1-score   support

           1       0.90      0.93      0.91       188
           2       0.93      0.89      0.91       182
           3       0.92      0.93      0.93       299
           4       0.91      0.89      0.90       131

    accuracy                           0.92       800
   macro avg       0.91      0.91      0.91       800
weighted avg       0.92      0.92      0.91       800



In [93]:
# Store data in Firebase (replace 'output_data' with your desired Firebase path)
output_ref = db.reference('/y_pred')
# Example output data (replace this with your actual output)
output_data = {'y_pred': career_paths[y_pred[0]], 'metric': accuracy}
print(output_data)
output_ref.set(output_data)

{'y_pred': 'Medical', 'metric': 0.915}


In [94]:
# Display the predicted career paths for each data point
for prediction in y_pred:
    print(f"Predicted Career Path (Numeric): {prediction}")


Predicted Career Path (Numeric): 1


In [95]:
# Print the distribution of predicted classes
print("Predicted Class Distribution:")
print(pd.Series(y_pred).value_counts())


Predicted Class Distribution:
1    1
dtype: int64
