In [None]:
import pandas as pd
import glob
import numpy as np
from scipy.stats import ttest_ind
from statsmodels.stats.multitest import multipletests
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib_venn import venn3
from google.colab import files
from google.colab import drive
import os

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
csv_files = [
    "/content/drive/My Drive/Academic_Thesis/Final_Thesis/ThesisAllData/GSE10950_gene_expression_renamed.csv",
    "/content/drive/My Drive/Academic_Thesis/Final_Thesis/ThesisAllData/GSE25070_gene_expression_renamed.csv",
    "/content/drive/My Drive/Academic_Thesis/Final_Thesis/ThesisAllData/GSE41328_gene_expression_renamed.csv",
    "/content/drive/My Drive/Academic_Thesis/Final_Thesis/ThesisAllData/GSE74602_gene_expression_renamed.csv",
    "/content/drive/My Drive/Academic_Thesis/Final_Thesis/ThesisAllData/GSE44861_gene_expression_renamed.csv",
    "/content/drive/My Drive/Academic_Thesis/Final_Thesis/ThesisAllData/GSE113513_gene_expression_renamed.csv",
    "/content/drive/My Drive/Academic_Thesis/Final_Thesis/ThesisAllData/GSE110223_gene_expression_renamed.csv",
    "/content/drive/My Drive/Academic_Thesis/Final_Thesis/ThesisAllData/GSE8671_gene_expression_renamed.csv",
    "/content/drive/My Drive/Academic_Thesis/Final_Thesis/ThesisAllData/GSE21815_gene_expression_renamed.csv",
    "/content/drive/My Drive/Academic_Thesis/Final_Thesis/ThesisAllData/GSE44076_gene_expression_renamed.csv",
    "/content/drive/My Drive/Academic_Thesis/Final_Thesis/ThesisAllData/GSE106582_gene_expression_renamed.csv"

]

dfs = [pd.read_csv(file, index_col=0) for file in csv_files]

for i, df in enumerate(dfs):
    print(f"Dataset {i+1} ({csv_files[i].split('/')[-1]}):")
    print(df['target'].value_counts(), '\n')

Dataset 1 (GSE10950_gene_expression_renamed.csv):
target
0    24
1    24
Name: count, dtype: int64 

Dataset 2 (GSE25070_gene_expression_renamed.csv):
target
1    26
0    26
Name: count, dtype: int64 

Dataset 3 (GSE41328_gene_expression_renamed.csv):
target
0    10
1    10
Name: count, dtype: int64 

Dataset 4 (GSE74602_gene_expression_renamed.csv):
target
1    30
0    30
Name: count, dtype: int64 

Dataset 5 (GSE44861_gene_expression_renamed.csv):
target
1    56
0    55
Name: count, dtype: int64 

Dataset 6 (GSE113513_gene_expression_renamed.csv):
target
0    14
1    14
Name: count, dtype: int64 

Dataset 7 (GSE110223_gene_expression_renamed.csv):
target
0    13
1    13
Name: count, dtype: int64 

Dataset 8 (GSE8671_gene_expression_renamed.csv):
target
0    32
1    32
Name: count, dtype: int64 

Dataset 9 (GSE21815_gene_expression_renamed.csv):
target
1    132
0      9
Name: count, dtype: int64 

Dataset 10 (GSE44076_gene_expression_renamed.csv):
target
1    98
0    50
Name: count, d

In [None]:
output_dir = "/content/drive/My Drive/Academic_Thesis/Final_Thesis/Combine_CSVs_&_Make_DEG/target_distribution_plots"
os.makedirs(output_dir, exist_ok=True)

In [None]:
for file in csv_files:
    df = pd.read_csv(file, index_col=0)
    counts = df["target"].value_counts().sort_index()
    labels = ['Normal', 'CRC']

    plt.figure(figsize=(4, 4))
    x_positions = [0.33, 0.67]
    bars = plt.bar(x_positions, counts.values, width=0.3, color=["#636EFA", "#EF553B"])
    plt.xticks(x_positions, labels)
    plt.xlim(0, 1)

    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2, height + 1, int(height), ha='center', va='bottom', fontsize=10)

    dataset_name = file.split("/")[-1].replace("_gene_expression_renamed.csv", "")
    plt.title(f"Target Distribution in {dataset_name}", fontsize=11)
    plt.ylabel("Number of Samples")
    plt.ylim(0, max(counts.values) * 1.2)
    plt.tight_layout()

    # Save as PNG
    filename = os.path.join(output_dir, f"{dataset_name}_target_distribution.png")
    plt.savefig(filename, dpi=300)
    plt.close()

print(f"All figures saved in: {output_dir}")

All figures saved in: /content/drive/My Drive/Academic_Thesis/Final_Thesis/Combine_CSVs_&_Make_DEG/target_distribution_plots
