In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns   


In [2]:

file_path = 'Pain-Dataset\Demographics_Questionnaires_Dataset_v4.xlsx'
df = pd.read_excel(file_path)


In [3]:
# Display basic information
print("Shape of the DataFrame:", df.shape)
print("Columns:", df.columns)



Shape of the DataFrame: (40, 12)
Columns: Index(['ID', 'Pain Score (Actual Pain of Brief Pain Inventory)', 'Age',
       'Gender', 'Etiology of NP*', 'Time with NP', 'Medical treatment for NP',
       'Since when have you use the previous medication?',
       'Have you had any medical procedures to control the pain?',
       'Do you go regularly to phsychological or emotional counseling sessions for your pain?',
       'Do you suffer from any neurological disorder? (eg. epilepsy, Alzheimer, tinnitus)',
       'Have you suffered any head traumatism? '],
      dtype='object')


In [4]:
df.head()

Unnamed: 0,ID,Pain Score (Actual Pain of Brief Pain Inventory),Age,Gender,Etiology of NP*,Time with NP,Medical treatment for NP,Since when have you use the previous medication?,Have you had any medical procedures to control the pain?,Do you go regularly to phsychological or emotional counseling sessions for your pain?,"Do you suffer from any neurological disorder? (eg. epilepsy, Alzheimer, tinnitus)",Have you suffered any head traumatism?
0,0,7.0,25.0,F,Central Nervous System Disorder (CRPS or Lyme),More than 2 years,"Pregabalin, amitriptyline",More than a year ago,Nerve blocks and infusions*,Yes,No,
1,1,4.0,57.0,F,Diabetes,More than 2 years,Tramadol,More than a month ago,,No,No,
2,2,3.0,20.0,F,Peripheral neuropathy,More than 2 years,Keterolac,More than a year ago,,No,No,
3,3,8.0,34.0,F,Spinal cord or nerve root injury,More than 2 years,Tramadol,More than a year ago,Physiotherapy,No,No,
4,4,5.0,77.0,M,Spinal cord or nerve root injury,More than 2 years,,,,No,No,Yes - more than 20 years ago


In [5]:
# Additional info
print("\nBasic Info (Info):")
df.info()


Basic Info (Info):
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 12 columns):
 #   Column                                                                                 Non-Null Count  Dtype  
---  ------                                                                                 --------------  -----  
 0   ID                                                                                     39 non-null     object 
 1   Pain Score (Actual Pain of Brief Pain Inventory)                                       36 non-null     float64
 2   Age                                                                                    36 non-null     float64
 3   Gender                                                                                 36 non-null     object 
 4   Etiology of NP*                                                                        36 non-null     object 
 5   Time with NP                                                

In [6]:
# Total null values in each column
null_counts = df.isnull().sum()

# Display the result
print("Null values in each column:")
print(null_counts)


Null values in each column:
ID                                                                                        1
Pain Score (Actual Pain of Brief Pain Inventory)                                          4
Age                                                                                       4
Gender                                                                                    4
Etiology of NP*                                                                           4
Time with NP                                                                              4
Medical treatment for NP                                                                 12
Since when have you use the previous medication?                                         10
Have you had any medical procedures to control the pain?                                 26
Do you go regularly to phsychological or emotional counseling sessions for your pain?     4
Do you suffer from any neurological disorder? (eg. e

In [7]:
# Percentage of null values in each column
null_percentage = (df.isnull().sum() / len(df)) * 100

# Display the result
print("Percentage of null values in each column:")
print(null_percentage)


Percentage of null values in each column:
ID                                                                                        2.5
Pain Score (Actual Pain of Brief Pain Inventory)                                         10.0
Age                                                                                      10.0
Gender                                                                                   10.0
Etiology of NP*                                                                          10.0
Time with NP                                                                             10.0
Medical treatment for NP                                                                 30.0
Since when have you use the previous medication?                                         25.0
Have you had any medical procedures to control the pain?                                 65.0
Do you go regularly to phsychological or emotional counseling sessions for your pain?    10.0
Do you suffer from

In [9]:
# Create dictionary with ID as key and Pain Score as value
pain_score_dict = dict(zip(
    df['ID'],
    df['Pain Score (Actual Pain of Brief Pain Inventory)']
))

# Display a few entries to verify
for k, v in list(pain_score_dict.items()):  
    print(f"ID: {k} -> Pain Score: {v}")

ID: 0 -> Pain Score: 7.0
ID: 1 -> Pain Score: 4.0
ID: 2 -> Pain Score: 3.0
ID: 3 -> Pain Score: 8.0
ID: 4 -> Pain Score: 5.0
ID: 5 -> Pain Score: 2.0
ID: 6 -> Pain Score: 7.0
ID: 7 -> Pain Score: 3.0
ID: 8 -> Pain Score: 4.0
ID: 9 -> Pain Score: 9.0
ID: 10 -> Pain Score: 3.0
ID: 11 -> Pain Score: 6.0
ID: 13 -> Pain Score: 3.0
ID: 14 -> Pain Score: 3.0
ID: 15 -> Pain Score: 8.0
ID: 16 -> Pain Score: 5.0
ID: 18 -> Pain Score: 5.0
ID: 19 -> Pain Score: 8.0
ID: 20 -> Pain Score: 7.0
ID: 21 -> Pain Score: 6.0
ID: 22 -> Pain Score: 7.0
ID: 23 -> Pain Score: 6.0
ID: 24 -> Pain Score: 9.0
ID: 25 -> Pain Score: 8.0
ID: 26 -> Pain Score: 0.0
ID: 27 -> Pain Score: 1.0
ID: 30 -> Pain Score: 3.0
ID: 31 -> Pain Score: 9.0
ID: 33 -> Pain Score: 6.0
ID: 35 -> Pain Score: 1.0
ID: 37 -> Pain Score: 0.0
ID: 38 -> Pain Score: 7.0
ID: 39 -> Pain Score: 8.0
ID: 40 -> Pain Score: 4.0
ID: 41 -> Pain Score: 7.0
ID: 43 -> Pain Score: 6.0
ID: nan -> Pain Score: nan
ID: *NP is neuropathic pain  -> Pain Score: nan

In [10]:
# Get unique values and their counts
value_counts = df['Pain Score (Actual Pain of Brief Pain Inventory)'].value_counts(dropna=False)

# Display the result
print("Unique values and their counts:")
print(value_counts)


Unique values and their counts:
Pain Score (Actual Pain of Brief Pain Inventory)
7.0    6
3.0    6
8.0    5
6.0    5
NaN    4
4.0    3
5.0    3
9.0    3
0.0    2
1.0    2
2.0    1
Name: count, dtype: int64
