### Exemple 02

Establishment and characterization of persistent *Pseudomonas aeruginosa* infections in air–liquid interface cultures of human airway epithelial cells.

doi: https://doi.org/10.1128/iai.00603-24

**Ref:** Bouheraoua S, Cleeves S, Preusse M, Müsken M, Braubach P, Fuchs M, Falk C, Sewald K, Häussler S. 2025. Establishment and characterization of persistent Pseudomonas aeruginosa infections in air–liquid interface cultures of human airway epithelial cells. Infect Immun 93:e00603-24.
https://doi.org/10.1128/iai.00603-24

- The exemple was done for the Calu-3 PAO1 Day 5 vs Inoculum

Obj: extract background and upregulated genes for functional enrichment analysis

In [43]:
## Get the data
import pandas as pd
df = pd.read_excel("iai.00603-24-s0006.xlsx", sheet_name='Calu-3 PAO1 Day 5 vs Inoculum')

In [44]:
new_header = df.iloc[0]

df = df[1:]  
df.columns = new_header
df

Unnamed: 0,Locus Tag,Gene name,log2FC,FDR
1,PA4096,,8.812327,0.000172
2,PA1541,,6.55187,0.000007
3,PA1540,,6.255027,0.000002
4,PA3391,nosR,5.74318,0.000149
5,PA5471,armZ,5.629825,0.0
...,...,...,...,...
1465,PA0985,pyoS5,-4.482265,0.000044
1466,PA4710,phuR,-4.543053,0.000031
1467,PA1337,ansB,-4.695082,0.0
1468,PA2398,fpvA,-4.858298,0.000157


In [45]:
len(df)

1469

In [46]:
## Check for NAs in Locus Tag == Gene id
len(df[df["Locus Tag"].isna()])

0

In [5]:
## Some of the ids end with ".1"
## The Kegg database does not contain ids ending in “.1,” which are likely variations of a transcript. 
## Therefore, endings “.1” will be removed, as will duplicates.
# df["Locus Tag"] = df["Locus Tag"].apply(lambda gene: gene.split(".")[0] if isinstance(gene, str) and gene.endswith(".1") else gene)

In [47]:
len(df[df["Locus Tag"].duplicated()])

0

#### Background genes

All gene detected in the RNA-Seq analysis - without duplicates

In [48]:
genes_background = []
for gene in df["Locus Tag"]:
    if gene not in genes_background:
        genes_background.append(gene)
len(genes_background)

1469

In [49]:
with open('background_ex2.txt', 'w') as file:
    for gene in genes_background:
        file.write(f"{gene}\n")

In [50]:
print(len(genes_background))
with open("background_ex2.txt", "r", encoding="utf-8") as f:
    lines = f.readlines()
print(len(lines))

1469
1469


#### Upregulated genes

All genes that log2FC > 1, and FDR < 0.05

In [51]:
upregulated = df[(df['log2FC'] > 1) & (df['FDR'] < 0.05)]
upregulated

Unnamed: 0,Locus Tag,Gene name,log2FC,FDR
1,PA4096,,8.812327,0.000172
2,PA1541,,6.55187,0.000007
3,PA1540,,6.255027,0.000002
4,PA3391,nosR,5.74318,0.000149
5,PA5471,armZ,5.629825,0.0
...,...,...,...,...
727,PA3200,,1.005837,0.03461
728,PA2251,,1.005567,0.020637
729,PA3816,cysE,1.005385,0.029681
730,PA2464,,1.001728,0.040561


In [52]:
len(upregulated[upregulated["Locus Tag"].duplicated()])

0

In [53]:
upregulated = upregulated.drop_duplicates(subset='Locus Tag')
len(upregulated)

731

In [54]:
with open('upregulated_ex2.txt', 'w') as file:
    for gene in upregulated["Locus Tag"]:
        file.write(f"{gene}\n")

In [55]:
print(len(upregulated))
with open("upregulated_ex2.txt", "r", encoding="utf-8") as f:
    lines = f.readlines()
print(len(lines))

731
731


##### Upregulated genes name

In [56]:
len(upregulated[upregulated["Gene name"].isna()])

481

In [57]:
from ResPathExplorer.mapper_KeggFunctions import get_gene_name_by_kegg_id
list_gene_ids = upregulated[upregulated["Gene name"].isna()]['Locus Tag'].tolist()

dict_g = {}
not_found_list = []

for id_g in list_gene_ids:
    id_go = "pae:" + id_g

    try:
        name = get_gene_name_by_kegg_id(id_go)

        if name:
            dict_g[id_g] = name
        else:
            dict_g[id_g] = ""

    except Exception as e:
        not_found_list.append(id_g)
        continue


In [58]:
dict_g

{'PA4096': '',
 'PA1541': '',
 'PA1540': '',
 'PA2135': '',
 'PA2260': '',
 'PA2261': '',
 'PA0882': '',
 'PA3445': '',
 'PA5470': '',
 'PA4130': '',
 'PA4131': '',
 'PA5460': '',
 'PA0711': '',
 'PA2262': '',
 'PA2214': '',
 'PA4103': '',
 'PA3718': '',
 'PA2472': '',
 'PA2022': '',
 'PA3431': '',
 'PA1981': '',
 'PA2712': '',
 'PA0738': '',
 'PA2288': '',
 'PA1238': '',
 'PA4133': '',
 'PA1190': '',
 'PA4129': '',
 'PA0737': '',
 'PA2069': '',
 'PA0613': '',
 'PA4674': '',
 'PA3444': '',
 'PA2882': '',
 'PA3432': '',
 'PA1425': '',
 'PA1230': '',
 'PA4880': '',
 'PA1220': '',
 'PA2148': '',
 'PA2803': '',
 'PA2068': '',
 'PA2312': '',
 'PA2339': '',
 'PA2563': '',
 'PA0939': '',
 'PA4106': '',
 'PA1029': '',
 'PA1873': '',
 'PA3449': '',
 'PA5106': '',
 'PA4824': '',
 'PA4635': '',
 'PA2724': '',
 'PA4985': '',
 'PA2062': '',
 'PA4321': '',
 'PA4592': '',
 'PA3730': '',
 'PA1265': '',
 'PA2122': '',
 'PA0842': '',
 'PA4995': '',
 'PA4140': '',
 'PA0805': '',
 'PA1219': '',
 'PA5524':

In [59]:
not_found_list

['PA1427',
 'PA0805.1',
 'PA1426',
 'PA5471.1',
 'PA3991',
 'PA0852.1',
 'PA0708.1',
 'PA3218',
 'PA0717',
 'PA0980',
 'PA4028',
 'PA3090']

In [61]:
filtered_dict_g = {k: v for k, v in dict_g.items() if v != ""}
filtered_dict_g

{}

In [62]:
upregulated['Gene name'] = upregulated.apply(
    lambda row: filtered_dict_g[row['Locus Tag']] if row['Locus Tag'] in filtered_dict_g else row['Gene name'],
    axis=1)

In [83]:
upregulated.to_excel("df_genesup.xlsx")

In [65]:
gene_list = upregulated['Gene name'].dropna().unique().tolist()
len(gene_list)

250

In [66]:
with open('upregulatedGName_ex2.txt', 'w') as file:
    for gene in gene_list:
        file.write(f"{gene}\n")

In [67]:
with open("upregulatedGName_ex2.txt", "r", encoding="utf-8") as f:
    lines = f.readlines()
print(len(lines))

250


#### Downreulated genes

All genes that log2FC < -1, and FDR < 0.05

In [68]:
downregulated = df[(df['log2FC'] < -1) & (df['FDR'] < 0.05)]
downregulated

Unnamed: 0,Locus Tag,Gene name,log2FC,FDR
776,PA0750,ung,-1.00362,0.022402
777,PA0344,,-1.007147,0.040282
778,PA4048,,-1.012717,0.023763
779,PA4546,pilS,-1.014871,0.023626
780,PA3686,adk,-1.017409,0.024234
...,...,...,...,...
1465,PA0985,pyoS5,-4.482265,0.000044
1466,PA4710,phuR,-4.543053,0.000031
1467,PA1337,ansB,-4.695082,0.0
1468,PA2398,fpvA,-4.858298,0.000157


In [69]:
len(downregulated[downregulated["Locus Tag"].duplicated()])

0

In [70]:
downregulated = downregulated.drop_duplicates(subset='Locus Tag')
len(downregulated)

694

In [71]:
with open('downregulated_ex2.txt', 'w') as file:
    for gene in downregulated["Locus Tag"]:
        file.write(f"{gene}\n")

In [72]:
print(len(downregulated))
with open("downregulated_ex2.txt", "r", encoding="utf-8") as f:
    lines = f.readlines()
print(len(lines))

694
694


#### Downregulated genes names

In [73]:
len(downregulated[downregulated["Gene name"].isna()])

299

In [74]:
list_gene_ids = downregulated[downregulated["Gene name"].isna()]['Locus Tag'].tolist()

dict_g = {}
not_found_list = []

for id_g in list_gene_ids:
    id_go = "pae:" + id_g

    try:
        name = get_gene_name_by_kegg_id(id_go)

        if name:
            dict_g[id_g] = name
        else:
            dict_g[id_g] = ""

    except Exception as e:
        not_found_list.append(id_g)
        continue

In [75]:
dict_g

{'PA0344': '',
 'PA4048': '',
 'PA5006': '',
 'PA2668': '',
 'PA4796': '',
 'PA3768': '',
 'PA4389': 'speA',
 'PA4558': '',
 'PA1597': '',
 'PA1033': '',
 'PA3611': '',
 'PA5003': '',
 'PA3001': '',
 'PA0659': '',
 'PA0168': '',
 'PA5455': '',
 'PA3804': '',
 'PA5551': '',
 'PA3110': '',
 'PA3224': '',
 'PA1181': '',
 'PA0115': '',
 'PA0602': '',
 'PA1533': '',
 'PA1504': '',
 'PA3286': '',
 'PA0475': '',
 'PA1222': '',
 'PA3262': '',
 'PA4426': '',
 'PA3139': '',
 'PA3020': '',
 'PA4486': '',
 'PA0285': '',
 'PA5310': '',
 'PA4473': '',
 'PA3239': '',
 'PA3822': '',
 'PA0559': '',
 'PA1160': '',
 'PA3983': '',
 'PA0150': '',
 'PA5226': '',
 'PA4595': '',
 'PA4907': '',
 'PA3440': '',
 'PA0502': '',
 'PA0955': '',
 'PA4636': '',
 'PA2575': '',
 'PA4736': '',
 'PA3694': '',
 'PA1615': '',
 'PA0801': '',
 'PA0847': '',
 'PA3764': '',
 'PA4452': '',
 'PA0777': '',
 'PA0930': '',
 'PA4336': '',
 'PA0646': '',
 'PA3697': '',
 'PA3833': '',
 'PA4011': '',
 'PA1023': '',
 'PA4010': '',
 'PA53

In [76]:
filtered_dict_g = {k: v for k, v in dict_g.items() if v != ""}
filtered_dict_g

{'PA4389': 'speA'}

In [77]:
downregulated['Gene name'] = downregulated.apply(
    lambda row: filtered_dict_g[row['Locus Tag']] if row['Locus Tag'] in filtered_dict_g else row['Gene name'],
    axis=1)

In [78]:
gene_list = downregulated['Gene name'].dropna().unique().tolist()
len(gene_list)

396

In [79]:
with open('downregulatedGName_ex2.txt', 'w') as file:
    for gene in gene_list:
        file.write(f"{gene}\n")

In [80]:
with open("downregulatedGName_ex2.txt", "r", encoding="utf-8") as f:
    lines = f.readlines()
print(len(lines))

396


In [84]:
downregulated.to_excel("df_genesdown.xlsx")