In [None]:
# Importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Reading csv file
df = pd.read_csv('../Datasets/data.csv')

In [None]:
# Size of dataset (rows, columns)
df.shape

In [None]:
# Columns
df.columns

In [None]:
# Datatypes of columns
df.dtypes

In [None]:
# Checking for null values
df.isnull().sum()

In [None]:
# Dropping unnamed columns
df = df.iloc[:, :172]

In [None]:
# Dropping position and time columns
for i in range(1, 43):
    pos = 'Q' + str(i) + 'I'
    time = 'Q' + str(i) + 'E'
    df.drop([pos, time], axis=1, inplace=True)

In [None]:
# Dropping unnecessary columns
drop_columns = ['engnat', 'hand', 'religion', 'orientation', 'race', 'voted', 'married', 'major', 'country', 'screensize', 'uniquenetworklocation', 'source', 'introelapse', 'testelapse', 'surveyelapse']
df.drop(drop_columns, axis=1, inplace=True)

In [None]:
# Dropping remaining questions and reordering according to DASS21 scale
Q_to_drop = ['1', '5', '7', '9', '11', '12', '15', '16', '18', '19', '21', '22', '23', '24', '26', '27', '30', '34', '35', '36', '37']
for i in Q_to_drop:
    d = 'Q' + i + 'A'
    df.drop([d], axis=1, inplace=True)

In [None]:
# Mapping questions
q1 = [29, 2, 3, 4, 42, 6, 41, 33, 40, 10, 39, 8, 13, 32, 28, 31, 17, 14, 25, 20, 38]
q_map = {str(key): str(i + 1) for i, key in enumerate(q1)}
dass = ['S', 'A', 'D', 'A', 'D', 'S', 'A', 'S', 'A', 'D', 'S', 'S', 'D', 'S', 'A', 'D', 'D', 'S', 'A', 'A', 'D']
for (key, value), d in zip(q_map.items(), dass):
    old = 'Q' + key + 'A'
    new = 'Q' + value + '(' + d + ')'
    df.rename(columns={old: new}, inplace=True)
    df[new].replace([1, 2, 3, 4], [0, 1, 2, 3])

In [None]:
# Reindexing columns
df = df.reindex(columns=[col for col in df.columns if col in q_map.values()] + [col for col in df.columns if col not in q_map.values()])

In [None]:
# Calculating scores
df['Str'] = df[[col for col in df.columns if '(S)' in col]].sum(axis=1) * 2
df['Anx'] = df[[col for col in df.columns if '(A)' in col]].sum(axis=1) * 2
df['Dep'] = df[[col for col in df.columns if '(D)' in col]].sum(axis=1) * 2