In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv("Mental_health_prediction.csv")
df

In [None]:
#Some statistics and information about the dataset
print(df.describe())
print(df.info())

In [None]:
#Checking for missing 
df.isna().sum()

In [None]:
#complete missing age with median
df['Age'].fillna(df['Age'].median(), inplace = True)

In [None]:
df['Age'].value_counts()

In [None]:
#As we see few of the age value is incorrect with negative values and 999999 we need to correct those
# Fill with media() values < 18 and > 120
s = pd.Series(df['Age'])
s[s<18] = df['Age'].median()
df['Age'] = s
s = pd.Series(df['Age'])
s[s>120] = df['Age'].median()
df['Age'] = s

#Ranges of Age
df['age_range'] = pd.cut(df['Age'], [0,20,30,65,100], labels=["0-20", "21-30", "31-65", "66-100"], include_lowest=True)

In [None]:
df['Age'].value_counts()

In [None]:
#Dropping unimportant columns
df.drop(['comments'], axis= 1, inplace=True)
df.drop(['state'], axis= 1, inplace=True)
df.drop(['Timestamp'], axis= 1, inplace=True)

df.head(5)

In [None]:
df['Gender'].value_counts()

In [None]:
#correcting Gender Column
gender_dict = {'Male' : ['male', 'M', 'm', 'Make', 'cis Male', 'Man', 'Male', 'msle', 'Cis Male', 'Mail', 'Malr', 'Cis Man', 'Guy (-ish) ^_^','Male-ish', 'maile', 'Mal', 'Male (CIS)',"something kinda male?"],
'trans' : ["Trans-female", "trans_woman",  "Female (trans)", 'Trans woman',"Androgyne", "male leaning androgynous"],           
'gender_queer' : ["queer/she/they", "Agender", "non-binary", "Neuter", "queer", 'Genderqueer',"ostensibly male, unsure what that really means", "Enby", "fluid", "gender_queer"], 
'Female' : ["Cis Female", "f", 'F',"Female", "woman",  "Femake", "female","cis-female/femme", "Female (cis)", "femail"],
'Don_not_know' : [ "Nah", "All", 'p','A little about you']}

for index,record in df['Gender'].iteritems():
  for k,v in gender_dict.items():
    if record in v:
      df.at[index,'Gender']= k

In [None]:
df['Gender'].value_counts()

In [None]:
# Assign default values for each data type
defaultInt = 0
defaultString = 'NaN'
defaultFloat = 0.0

# Create lists by data tpe
intFeatures = ['Age']
stringFeatures = ['Gender', 'Country', 'self_employed', 'family_history', 'treatment', 'work_interfere',
                 'no_employees', 'remote_work', 'tech_company', 'anonymity', 'leave', 'mental_health_consequence',
                 'phys_health_consequence', 'coworkers', 'supervisor', 'mental_health_interview', 'phys_health_interview',
                 'mental_vs_physical', 'obs_consequence', 'benefits', 'care_options', 'wellness_program',
                 'seek_help']
floatFeatures = []

# Clean the NaN's
for feature in df:
    if feature in intFeatures:
        df[feature] = df[feature].fillna(defaultInt)
    elif feature in stringFeatures:
        df[feature] = df[feature].fillna(defaultString)
    elif feature in floatFeatures:
        df[feature] = df[feature].fillna(defaultFloat)
    else:
        print('Error: Feature %s not recognized.' % feature)
df.head()

In [None]:
default = 'NaN'
df['self_employed'] = df['self_employed'].replace([default], 'No')
print(df['self_employed'].unique())

In [None]:
df['work_interfere'] = df['work_interfere'].replace([default], 'Don\'t know')
print(df['work_interfere'].unique())

In [None]:
#Checking for missing 
df.isna().sum()

In [None]:
#Encoding data
from sklearn import preprocessing
from sklearn.preprocessing import binarize, LabelEncoder, MinMaxScaler
labelDict = {}
for feature in df:
    le = preprocessing.LabelEncoder()
    le.fit(df[feature])
    le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
    df[feature] = le.transform(df[feature])
    # Get labels
    labelKey = 'label_' + feature
    labelValue = [*le_name_mapping]
    labelDict[labelKey] =labelValue
    
for key, value in labelDict.items():     
    print(key, value)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
#correlation matrix
corrmat = df.corr()
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(corrmat, vmax=.8, square=True);
plt.show()

In [None]:
df

In [None]:
!pip install 'qiskit[machine-learning]'

In [None]:
import qiskit

In [None]:
from matplotlib import pyplot as plt
import numpy as np
from qiskit import Aer, BasicAer
from qiskit.utils import QuantumInstance
from qiskit.providers.aer import QasmSimulator
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.algorithms import QSVC

In [None]:
#features = df.drop(columns = ['treatment'])
df_new = df.drop(df.iloc[:,15:],axis = 1)
labels = df.treatment.head(50)
sample = df_new.head(50)
sample

In [None]:
df_new.to_csv("sample_date.csv")

In [None]:
from sklearn.model_selection import train_test_split
from qiskit.utils import algorithm_globals

algorithm_globals.random_seed = 123
train_features, test_features, train_labels, test_labels = train_test_split(
    sample, labels, train_size=0.8, random_state=algorithm_globals.random_seed
)

In [None]:
!pip install pylatexenc

In [None]:
from qiskit.circuit.library import ZZFeatureMap

num_features = sample.shape[1]

feature_map = ZZFeatureMap(feature_dimension=num_features, reps=1)
feature_map.decompose().draw(output="mpl", fold=20)

In [None]:
num_features = sample.shape[1]

feature_map = ZZFeatureMap(feature_dimension=num_features, reps=1)

In [None]:
from qiskit.algorithms.state_fidelities import ComputeUncompute
from qiskit.primitives import Sampler

fidelity = ComputeUncompute(sampler=Sampler())

In [None]:
from qiskit_machine_learning.kernels import FidelityQuantumKernel

feature_map = ZZFeatureMap(2)
new_kernel = FidelityQuantumKernel(feature_map=feature_map, fidelity=fidelity)

##  Pickling the model

In [None]:
import pickle

In [None]:
qsvc = QSVC(quantum_kernel=new_kernel)
qsvc.fit(train_features, train_labels)

In [None]:
Pkl_Filename = "Pickle_QSVC_Model.pkl"  

with open(Pkl_Filename, 'wb') as file:  
    pickle.dump(qsvc, file)

In [None]:
with open(Pkl_Filename, 'rb') as file:  
    Pickled_QSVC_Model = pickle.load(file)

Pickled_QSVC_Model

In [None]:
# Use the Reloaded Model to 
# Calculate the accuracy score and predict target values

# Calculate the Score 
score = Pickled_QSVC_Model.score(test_features, test_labels)
# Print the Score
print("Test score: {0:.2f} %".format(100 * score))  

In [None]:
from sklearn.svm import SVC

svc = SVC()
_ = svc.fit(train_features, train_labels)  # suppress printing the return value

In [None]:
train_score_c4 = svc.score(train_features, train_labels)
test_score_c4 = svc.score(test_features, test_labels)

print(f"Classical SVC on the training dataset: {train_score_c4:.2f}")
print(f"Classical SVC on the test dataset:     {test_score_c4:.2f}")