### Import required libraries

In [1]:
import pandas as pd
import numpy as np
import re

### Read source file(s)

In [2]:
sd = pd.read_csv("data\DTI-AAL-SD.csv")
mean = pd.read_csv("data\DTI-AAL-Mean.csv")
volume = pd.read_csv("data\DTI-AAL-Volume.csv")

### Remove empty rows

In [3]:
sd = sd.dropna()
mean = mean.dropna()
volume = volume.dropna()

### Eliminate replicated columns

In [4]:
columns=["age", "gender", "handedness", "affected hemisphere", "post-stroke time", "group"]
sd = sd.drop(columns=columns)
mean = mean.drop(columns=columns)

### Fix the name of the columns
- Remove the number that have been added by pandas
- Attach the related parameter's name to first of the columns
- Attach the data set name to the fist of each column due to the merge action that will be done

In [5]:
DTI_PARAMETERS=["FA", "MD", "AxD", "RD", "RA", "omid"]
datasets ={
    'mean':mean,
    'sd':sd
}
for dataset_key in datasets.keys():
    dataset=datasets.get(dataset_key)
    parameter_index=0
    columns=dataset.columns
    columns=[re.sub('.\d{1}$', '', index) for index in columns]
    for index in range(1, len(columns)):
        if columns[index].startswith(DTI_PARAMETERS[parameter_index+1]):
            parameter_index+=1
        elif not columns[index].startswith(DTI_PARAMETERS[parameter_index]):
            columns[index]="{0}_{1}".format(DTI_PARAMETERS[parameter_index], columns[index])
        columns[index]="{0}_{1}".format(dataset_key, columns[index])
    globals()[dataset_key]=pd.DataFrame(data=dataset.to_numpy(), columns=columns)

### Merge three dataframe into a comprehensive dataframe

In [6]:
df = pd.merge(volume, mean, on="file code", how="outer")
df = pd.merge(df, sd, on="file code", how="outer")

### Fix data type of some columns

In [7]:
columns = ["file code", "post-stroke time", "age"]
df[columns] = df[columns].applymap(np.int64)

### Encode some columns

In [8]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
df["group"] = encoder.fit_transform(df["gender"])
df["gender"] = encoder.fit_transform(df["gender"])
df["handedness"] = encoder.fit_transform(df["handedness"])
df["affected hemisphere"] = encoder.fit_transform(df["affected hemisphere"])