In [1]:
import pandas as pd

## Read in data

First need to read in and split by new line, then split by tab delimiter and expand the columns out.

In [23]:
df = pd.read_csv('output.txt', header=None, sep='\n', encoding = 'utf-8')

In [24]:
df = df[0].str.split('\t', expand=True)

In [25]:
df.head(20)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,FlyBase ID,FBgn0053503,,,,,,,,,,
1,Annotation Symbol,CG33503,,,,,,,,,,
2,Symbol,Cyp12d1-d,,,,,,,,,,
3,Name,Cyp12d1-d,,,,,,,,,,
4,,Adult Male,,,Adult Female,,,Male v. Female,,Larval,,
5,Tissue,FPKM,SD,Enrichment,FPKM,SD,Enrichment,M/F,p value,FPKM,SD,Enrichment
6,Whole body,0.0,0,-1,0.0,0,-1,1.00,n.s.,0.0,0,-1
7,Head,0.0,0,-1,0.0,0,-1,1.00,n.s.,-,-,-
8,Eye,0.0,0,-1,0.0,0,-1,1.00,n.s.,-,-,-
9,Brain / CNS,0.0,0,-1,0.0,0,-1,1.00,n.s.,0.0,0,-1


Rename columns

In [26]:
column_names = [
    "Tissue",
    "Adult Male FPKM",
    "Adult Male SD",
    "Adult Male Enrichment",
    "Adult Female FPKM",
    "Adult Female SD",
    "Adult Female Enrichment",
    "Male v. Female M/F",
    "Male v. Female p value",
    "Larval FPKM",
    "Larval SD",
    "Larval Enrichment",
]
df.columns = column_names

In [32]:
df.head(5)

Unnamed: 0,Tissue,Adult Male FPKM,Adult Male SD,Adult Male Enrichment,Adult Female FPKM,Adult Female SD,Adult Female Enrichment,Male v. Female M/F,Male v. Female p value,Larval FPKM,Larval SD,Larval Enrichment
0,FlyBase ID,FBgn0053503,,,,,,,,,,
1,Annotation Symbol,CG33503,,,,,,,,,,
2,Symbol,Cyp12d1-d,,,,,,,,,,
3,Name,Cyp12d1-d,,,,,,,,,,
4,,Adult Male,,,Adult Female,,,Male v. Female,,Larval,,


### Remove errors

In [33]:
df = df[df.Tissue != "An error has occurred."]

In [34]:
df = df[~df.Tissue.isnull()]
df.head()

Unnamed: 0,Tissue,Adult Male FPKM,Adult Male SD,Adult Male Enrichment,Adult Female FPKM,Adult Female SD,Adult Female Enrichment,Male v. Female M/F,Male v. Female p value,Larval FPKM,Larval SD,Larval Enrichment
0,FlyBase ID,FBgn0053503,,,,,,,,,,
1,Annotation Symbol,CG33503,,,,,,,,,,
2,Symbol,Cyp12d1-d,,,,,,,,,,
3,Name,Cyp12d1-d,,,,,,,,,,
4,,Adult Male,,,Adult Female,,,Male v. Female,,Larval,,


In [38]:
df = df[df.Tissue != "Annotation Symbol"]
df = df[df.Tissue != "Symbol"]
df = df[df.Tissue != "Name"]
df = df[df.Tissue != "Tissue"]
df = df[df.Tissue != ""]
df.head(10)

Unnamed: 0,Tissue,Adult Male FPKM,Adult Male SD,Adult Male Enrichment,Adult Female FPKM,Adult Female SD,Adult Female Enrichment,Male v. Female M/F,Male v. Female p value,Larval FPKM,Larval SD,Larval Enrichment
0,FlyBase ID,FBgn0053503,,,,,,,,,,
6,Whole body,0.0,0.0,-1.0,0.0,0.0,-1.0,1.0,n.s.,0.0,0,-1
7,Head,0.0,0.0,-1.0,0.0,0.0,-1.0,1.0,n.s.,-,-,-
8,Eye,0.0,0.0,-1.0,0.0,0.0,-1.0,1.0,n.s.,-,-,-
9,Brain / CNS,0.0,0.0,-1.0,0.0,0.0,-1.0,1.0,n.s.,0.0,0,-1
10,Thoracicoabdominal ganglion,0.0,0.0,-1.0,0.0,0.01,-1.0,1.0,n.s.,-,-,-
11,Crop,0.0,0.0,-1.0,0.0,0.0,-1.0,1.0,n.s.,-,-,-
12,Midgut,0.0,0.0,-1.0,0.0,0.0,-1.0,1.0,n.s.,0.0,0,-1
13,Hindgut,0.0,0.0,-1.0,0.0,0.0,-1.0,1.0,n.s.,0.0,0,-1
14,Malpighian Tubules,0.01,0.02,-1.0,0.0,0.0,-1.0,1.0,n.s.,0.0,0,-1


In [39]:
df.loc[df.Tissue == "FlyBase ID", "ID"] = df["Adult Male FPKM"]
df.head()

Unnamed: 0,Tissue,Adult Male FPKM,Adult Male SD,Adult Male Enrichment,Adult Female FPKM,Adult Female SD,Adult Female Enrichment,Male v. Female M/F,Male v. Female p value,Larval FPKM,Larval SD,Larval Enrichment,ID
0,FlyBase ID,FBgn0053503,,,,,,,,,,,FBgn0053503
6,Whole body,0.0,0.0,-1.0,0.0,0.0,-1.0,1.0,n.s.,0.0,0,-1,
7,Head,0.0,0.0,-1.0,0.0,0.0,-1.0,1.0,n.s.,-,-,-,
8,Eye,0.0,0.0,-1.0,0.0,0.0,-1.0,1.0,n.s.,-,-,-,
9,Brain / CNS,0.0,0.0,-1.0,0.0,0.0,-1.0,1.0,n.s.,0.0,0,-1,


In [40]:
df.ID = df.ID.fillna(method='ffill')
df.head()

Unnamed: 0,Tissue,Adult Male FPKM,Adult Male SD,Adult Male Enrichment,Adult Female FPKM,Adult Female SD,Adult Female Enrichment,Male v. Female M/F,Male v. Female p value,Larval FPKM,Larval SD,Larval Enrichment,ID
0,FlyBase ID,FBgn0053503,,,,,,,,,,,FBgn0053503
6,Whole body,0.0,0.0,-1.0,0.0,0.0,-1.0,1.0,n.s.,0.0,0,-1,FBgn0053503
7,Head,0.0,0.0,-1.0,0.0,0.0,-1.0,1.0,n.s.,-,-,-,FBgn0053503
8,Eye,0.0,0.0,-1.0,0.0,0.0,-1.0,1.0,n.s.,-,-,-,FBgn0053503
9,Brain / CNS,0.0,0.0,-1.0,0.0,0.0,-1.0,1.0,n.s.,0.0,0,-1,FBgn0053503


In [41]:
df = df[df.Tissue != "FlyBase ID"]
df.head()

Unnamed: 0,Tissue,Adult Male FPKM,Adult Male SD,Adult Male Enrichment,Adult Female FPKM,Adult Female SD,Adult Female Enrichment,Male v. Female M/F,Male v. Female p value,Larval FPKM,Larval SD,Larval Enrichment,ID
6,Whole body,0.0,0,-1,0.0,0.0,-1,1.0,n.s.,0.0,0,-1,FBgn0053503
7,Head,0.0,0,-1,0.0,0.0,-1,1.0,n.s.,-,-,-,FBgn0053503
8,Eye,0.0,0,-1,0.0,0.0,-1,1.0,n.s.,-,-,-,FBgn0053503
9,Brain / CNS,0.0,0,-1,0.0,0.0,-1,1.0,n.s.,0.0,0,-1,FBgn0053503
10,Thoracicoabdominal ganglion,0.0,0,-1,0.0,0.01,-1,1.0,n.s.,-,-,-,FBgn0053503
