In [1]:
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import os
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from tqdm import tqdm
sns.set_palette('Dark2')
sns.set_style({'axes.axisbelow': True, 'axes.edgecolor': '.15', 'axes.facecolor': 'white',
               'axes.grid': True, 'axes.labelcolor': '.15', 'axes.linewidth': 1.25, 
               'figure.facecolor': 'white', 'font.family': ['sans-serif'], 'grid.color': '.15',
               'grid.linestyle': ':', 'grid.alpha': .5, 'image.cmap': 'Greys', 
               'legend.frameon': False, 'legend.numpoints': 1, 'legend.scatterpoints': 1,
               'lines.solid_capstyle': 'round', 'axes.spines.right': False, 'axes.spines.top': False,  
               'text.color': '.15',  'xtick.top': False, 'ytick.right': False, 'xtick.color': '.15',
               'xtick.direction': 'out', 'xtick.major.size': 6, 'xtick.minor.size': 3,
               'ytick.color': '.15', 'ytick.direction': 'out', 'ytick.major.size': 6,'ytick.minor.size': 3})
sns.set_context('talk')

In [2]:
from snapanalysis.config import OUTPUT_DIRECTORY as OUTPUT_DIRECTORY_MAIN

OUTPUT_DIRECTORY = os.path.join(OUTPUT_DIRECTORY_MAIN, 'metadata')
if not os.path.isdir(OUTPUT_DIRECTORY):
    os.makedirs(OUTPUT_DIRECTORY)

In [3]:
from snapanalysis.preprocessing.raw.identifier_conversion import PULLDOWN_ID_MAP

In [4]:
from snapanalysis.preprocessing.pulldown_metadata import OUTPUT_FILE as PD_META_OUT
from snapanalysis.preprocessing.pulldown_metadata import _INPUT_SPREADSHEET as EXCEL_INPUT

with pd.HDFStore(PD_META_OUT, 'r') as store:
    dates = pd.read_hdf(PD_META_OUT, '/meta/dates')
    dna_type = pd.read_hdf(PD_META_OUT, '/meta/dna')
    print('\n'.join(store.keys()))

/meta/color_palette
/meta/coloured_predictors
/meta/coloured_predictors_directionalised
/meta/coloured_predictors_directionalised_with_nulls
/meta/coloured_predictors_with_nulls
/meta/dates
/meta/dna
/meta/names_and_types
/meta/octamers
/meta/octamers_categorical
/meta/predictors
/meta/predictors_categorical
/meta/predictors_categorical_directionalised
/meta/predictors_categorical_directionalised_long
/meta/predictors_web
/meta/predictors_with_redundancy


In [5]:
df = pd.read_excel(EXCEL_INPUT).dropna(subset=['PD code'])
df.columns = [x.strip() for x in df.columns]
df = df.rename(columns={"PD code": "Nhuongs_code", 
                        'Forward Assembled on': 'Forward assembly date',
                        'Forward Pulldowed  on': 'Forward PD date',
                        'Reverse Assembled on': 'Reverse assembly date',
                        'Reverse Pulldowned on': 'Reverse PD date',
                        })

df['Pull-Down ID'] = df['Nhuongs_code'].apply(PULLDOWN_ID_MAP.get)
df['Pull-Down ID'] = df['Pull-Down ID'].fillna('Not used')

In [6]:
df.columns

Index(['Nhuongs_code', 'Octamer Name', 'Forward assembly date',
       'Forward PD date', 'Reverse assembly date', 'Reverse PD date',
       'DNA type', 'Comments', 'Pull-Down ID'],
      dtype='object')

In [7]:
df = df[['Nhuongs_code', 'Pull-Down ID', 
         'Octamer Name', 
         'Forward assembly date', 
         'Forward PD date',
         'Reverse assembly date',
         'Reverse PD date',
         'DNA type',
         'Comments'
        ]].copy()

In [8]:
df.to_excel(os.path.join(OUTPUT_DIRECTORY, 'PDs_with_reassigned_labels.xlsx'), index=False)