In [1]:
%run -i ../src/utilities.py

In [12]:
data_dir = '../time-series'
org_data_dir = f'{data_dir}/original'
ll_data_dir = f'{data_dir}/lifeline'

### Separate proteomics file into two replicate files

In [13]:
wt12_prot_xls = pd.ExcelFile(f'{org_data_dir}/wt1-2_proteomics/mc-e18-04-0255-s13.xlsx')
wt12_prot_df = pd.read_excel(wt12_prot_xls, 'heavy-light_data')
# wt12_prot_df = pd.read_excel(wt12_prot_xls, 'max_scaled_data')

In [14]:
for rep in ['r1', 'r2']:
    df = wt12_prot_df[wt12_prot_df['time_points'].str.contains(rep)]
    dirname = f'wt{rep[-1]}_proteomics'
    if not os.path.isdir(f'{org_data_dir}/{dirname}'):
        os.mkdir(f'{org_data_dir}/{dirname}')
    df.to_csv(f'{org_data_dir}/{dirname}/wt{rep[-1]}_proteomics.csv', index=False)
    
for readme in os.listdir(f'{org_data_dir}/wt1-2_proteomics'):
    if 'readme' in readme.lower():
        if 'rep1' in readme:
            os.rename(f'{org_data_dir}/wt1-2_proteomics/{readme}', f'{org_data_dir}/wt1_proteomics/{readme}')
        elif 'rep2 in readme':
            os.rename(f'{org_data_dir}/wt1-2_proteomics/{readme}', f'{org_data_dir}/wt2_proteomics/{readme}')

### Add lifeline points to datasets

In [15]:
for data_dir in os.listdir(org_data_dir):
    if data_dir not in ['.DS_Store', 'wt1-2_proteomics']:
        data_df = pd.DataFrame()
        budding_index = pd.DataFrame()
        for file in os.listdir(f'{org_data_dir}/{data_dir}'):
            if 'readme' not in file.lower() and file not in ['.DS_Store', 'rnaseq_proc']:
                if data_dir in ['wt1_microarray', 'wt2_microarray']:
                    data_df = pd.read_csv(f'{org_data_dir}/{data_dir}/{file}', index_col=0, skiprows=28, sep='\t')
                    data_df = data_df.set_index('symbol')
                    data_df = data_df.drop(labels=['sgd_id', 'sys_name'], axis=1)
                if data_dir in ['wt1_proteomics', 'wt2_proteomics']:
                    data_df = pd.read_csv(f'{org_data_dir}/{data_dir}/{file}', index_col=0, comment='#')
                    data_df.index = data_df.index.str[:-3]
                if data_dir == 'wt1_rnaseq':
                    data_df = pd.read_csv(f'{org_data_dir}/{data_dir}/{file}', index_col=0, comment='#')
                if data_dir == 'crypto_rnaseq':
                    data_df = pd.read_csv(f'{org_data_dir}/{data_dir}/{file}', index_col=0, comment='#', sep='\t')
            if 'readme' in file.lower() and file != '.DS_Store':
                if data_dir == 'crypto_rnaseq':
                    budding_index = grab_budding_index(f'{org_data_dir}/{data_dir}/{file}', encoding='ISO-8859-1')
                else:
                    budding_index = grab_budding_index(f'{org_data_dir}/{data_dir}/{file}')
        lifeline_df = data_df.T.reset_index()
        lifeline_df = lifeline_df.rename(columns={'index': 'Clock'})
        lifeline_df = lifeline_df.merge(budding_index, on='Clock')
        lifeline_df = lifeline_df.set_index('Lifeline')
        lifeline_df = lifeline_df.drop(['Total', 'Budded', 'Clock'], axis=1)
        lifeline_df = lifeline_df.T
        lifeline_df.index.name = 'Lifeline'
        lifeline_df.to_csv(f'{ll_data_dir}/{data_dir}_lifeline.txt', sep='\t')