In [2]:
#week2_day1_pandas_intro

import pandas as pd
print(pd.__version__)

#create a DataFrame with 3 series and then print it out with a numbered row
data = {
    "Gene": ["APP", "MAPT", "SNCA", "GRN", "C9orf72"],
    "Expression": [14.2, 9.8, 16.3, 8.7, 10.9],
    "Condition": ["AD", "AD", "PD", "FTD", "FTD"]
}

df = pd.DataFrame(data)
print(df)
print()

## some example commands for DataFrames
#df.head()      # prints first 5 rows
#df.tail(2)     # prints last 2 rows
#df.info()      # prints out structure, types, and counts
#df.describe()  # summary stats

# Syntax for calling DF indexes and checks
#df[df["Expression"] > 10]
#df[(df["Expression"] > 10) & (df["Condition"] == "AD")]

#Adds a new column of just highly-expressed rows (>10)
df["High"] = df["Expression"] > 10
#print(df["High"])

#drop (delete) a column
#df = df.drop(columns=["Condition"])

# sort or index whole DFs by column values or other
#df.sort_values() can take the following arguments: (by="name", axis=(0 is rows, 1 is columns), ascending=True, inplace=False (False returns a new sorted DF and leaves the original alone), kind='quicksort', na_position='last'(where to place blank or missing values))
df.sort_values(by="Expression", ascending=False, inplace=True)
print(df)
print()

#df.reset_index will set the index to be 0-N based on the current sorted order
df.reset_index(drop=True, inplace=True)
print(df)
print()

# DF.to_csv will save the DF to a .csv file (in the same folder, in this case)
df.to_csv("E:/DATA_SCIENCE/Projects/gene_expression.csv", index=False)
df2 = pd.read_csv("E:/DATA_SCIENCE/Projects/gene_expression.csv")
print(f"Saved DataFrame from .csv file:")
print(df2)



2.3.2
      Gene  Expression Condition
0      APP        14.2        AD
1     MAPT         9.8        AD
2     SNCA        16.3        PD
3      GRN         8.7       FTD
4  C9orf72        10.9       FTD

      Gene  Expression Condition   High
2     SNCA        16.3        PD   True
0      APP        14.2        AD   True
4  C9orf72        10.9       FTD   True
1     MAPT         9.8        AD  False
3      GRN         8.7       FTD  False

      Gene  Expression Condition   High
0     SNCA        16.3        PD   True
1      APP        14.2        AD   True
2  C9orf72        10.9       FTD   True
3     MAPT         9.8        AD  False
4      GRN         8.7       FTD  False

Saved DataFrame from .csv file:
      Gene  Expression Condition   High
0     SNCA        16.3        PD   True
1      APP        14.2        AD   True
2  C9orf72        10.9       FTD   True
3     MAPT         9.8        AD  False
4      GRN         8.7       FTD  False


In [22]:
# Day 4 Challenge — “Mock qPCR Gene Expression Analysis”
# Goal:
# load a small dataset of mock qPCR results, clean it, analyze differential expression, and export a summary report using Pandas.

import numpy as np
import pandas as pd
print(pd.__version__)

PCRdata = {
    "Gene": ["APP", "MAPT", "SNCA", "GRN", "C9orf72"],
    "Ct_Control": [23.1, 26.4, 24.9, 27.5, 29.1],
    "Ct_Treated": [21.8, 25.7, 26.2, 26.0, 27.9]
}
PCRdf = pd.DataFrame(PCRdata)
print(PCRdf)
print()

#Compute ΔCt and ΔΔCt
PCRdf["dCt"] = PCRdf["Ct_Treated"] - PCRdf["Ct_Control"]
PCRdf["mean_dCt"] = PCRdf["dCt"].mean()
PCRdf["ddCt"] = PCRdf["dCt"] - PCRdf["mean_dCt"]
PCRdf["fold_change"] = (np.power(2, -PCRdf["ddCt"]))   # np.power(base, exponent) --> exponent calculation from numpy library

print("4 new columns added: ")
print(PCRdf)
print()

PCRdf["High"] = PCRdf["fold_change"] > 1.2
PCRdf["Low"] = PCRdf["fold_change"] < 0.8

print("Check for high or low fold change: ")
print(PCRdf)
print()

PCRdf.sort_values(by="fold_change", ascending=False, inplace=True)
PCRdf.reset_index(drop=True, inplace=True)
PCRdf["fold_change"].round(2)

PCRdf["regulation"] = np.where(
    PCRdf["fold_change"] > 1, "up",
    np.where(PCRdf["fold_change"] < 1, "down", "no change")
)

print("Sorted by fold change and re-indexed: ")
PCRdf.style.background_gradient(cmap="coolwarm")
print()

PCRdf.to_csv("E:/DATA_SCIENCE/Projects/qPCR_gene_expression.csv", index=False)
PCRdf_filtered = pd.read_csv("E:/DATA_SCIENCE/Projects/qPCR_gene_expression.csv")
print(f"Saved qPCR DataFrame from .csv file:")
print(PCRdf_filtered)


2.3.2
      Gene  Ct_Control  Ct_Treated
0      APP        23.1        21.8
1     MAPT        26.4        25.7
2     SNCA        24.9        26.2
3      GRN        27.5        26.0
4  C9orf72        29.1        27.9

4 new columns added: 
      Gene  Ct_Control  Ct_Treated  dCt  mean_dCt  ddCt  fold_change
0      APP        23.1        21.8 -1.3     -0.68 -0.62     1.536875
1     MAPT        26.4        25.7 -0.7     -0.68 -0.02     1.013959
2     SNCA        24.9        26.2  1.3     -0.68  1.98     0.253490
3      GRN        27.5        26.0 -1.5     -0.68 -0.82     1.765406
4  C9orf72        29.1        27.9 -1.2     -0.68 -0.52     1.433955

Check for high or low fold change: 
      Gene  Ct_Control  Ct_Treated  dCt  mean_dCt  ddCt  fold_change   High  \
0      APP        23.1        21.8 -1.3     -0.68 -0.62     1.536875   True   
1     MAPT        26.4        25.7 -0.7     -0.68 -0.02     1.013959  False   
2     SNCA        24.9        26.2  1.3     -0.68  1.98     0.253490  Fal