# Clean up network history

Similar to https://colab.research.google.com/drive/1DdYPOcXxg7CmRFtpjHdmFUtPj7t2GGSq#scrollTo=bGvwRn-nyPVS
    
We are going to convert network history from files to json in the report

In [1]:
import os
import pandas as pd
import numpy as np
import pickle
import json

import sys
sys.path.append('../../')


from functools import singledispatch

@singledispatch
def to_serializable(val):
    """Used by default."""
    return str(val)

@to_serializable.register(np.float32)
def ts_float32(val):
    """Used if *val* is an instance of numpy.float32."""
    return np.float64(val)


In [5]:

DRIVE_DIR = '../../'
REPORT_DIR = f'{DRIVE_DIR}/reports'


def convert_history(reports: list, debug: bool = True) -> list:

  convert_history = {}

  for report_basename in reports:
    report_file = f'{DRIVE_DIR}/reports/{report_basename}'
    report_file_bak = f'{report_file}.pre-network-history'

    converted_files = []

    print(f"\n\nReading {os.path.basename(report_file)}")
    if "run_classifier" in report_basename:
        df = pd.read_csv(report_file)
    else:
        df = pd.read_csv(report_file, quotechar="'")
    if not debug:
      # backing up old file
      print(f'Backing up to {os.path.basename(report_file_bak)}')
      df.to_csv(report_file_bak, index = False, quotechar="'")

    if "network_history_file" in df.columns:
        for idx, row in df.iterrows():

          if "network_history_file" in row.index and isinstance(row.network_history_file, str) and len(row.network_history_file) > 0:
            if "network_history" not in row.index or row.network_history is np.NaN or len(row.network_history) == 0:
              print(f"converting network history {row.network_history_file}...")
              history_file = f'{DRIVE_DIR}/reports/{os.path.basename(row.network_history_file)}'

              if os.path.exists(history_file):
                print(f"loading {os.path.basename(history_file)}")
                with open(history_file, 'rb') as file:
                  network_history = pickle.load(file)
                  # print(network_history)


                converted_files.append(row.network_history_file)
                df.loc[idx, "network_history"] = json.dumps(network_history, default=to_serializable)
                df.loc[idx, "network_history_file"] = np.NaN
              else:
                print(f'Cannot locate: {history_file}')
                df.loc[idx, "network_history_file"] = np.NaN

            else:
              print("network_history already exists")

        convert_history[report_basename] = converted_files

        if "network_history" in df.columns:
          print(df[["network_history_file", "network_history"]])
        else:
          print(f'Nothing to convert {report_basename}')
        # TODO: overwrite file
        if not debug:
          print(f'Saving {os.path.basename(report_file)}')
          df.to_csv(f'{report_file}', index = False, quotechar="'")

  return convert_history



reports = [file for file in os.listdir(f'{REPORT_DIR}') 
           if file.endswith("report.csv") and not "feature_generator" in file]

history_dict = convert_history(reports, debug = False)




Reading 2020-05-dl_prototype-report.csv
Backing up to 2020-05-dl_prototype-report.csv.pre-network-history
   network_history_file                                    network_history
0                   NaN  {"loss": [1.6073917150497437, 1.57548284530639...
Saving 2020-05-dl_prototype-report.csv


Reading 2019-11-01-run_classifiers-50k-report.csv
Backing up to 2019-11-01-run_classifiers-50k-report.csv.pre-network-history


Reading LSTMB128_dr2_rdr2_batch128_lr001-glove_with_stop_nonlemmatized-dl_prototype-report.csv
Backing up to LSTMB128_dr2_rdr2_batch128_lr001-glove_with_stop_nonlemmatized-dl_prototype-report.csv.pre-network-history
   network_history_file                                    network_history
0                   NaN  {"loss": [1.3436235189437866, 1.23250436782836...
Saving LSTMB128_dr2_rdr2_batch128_lr001-glove_with_stop_nonlemmatized-dl_prototype-report.csv


Reading LSTMB128_dr0_rdr2_batch32_lr01-glove_with_stop_nonlemmatized-dl_prototype-report.csv
Backing up to LSTM

In [6]:
history_df = pd.DataFrame()


for key, value in history_dict.items():
  # print(f'{key} {value}')
  if len(value) > 0:
    for report in value:
      file = f'{REPORT_DIR}/{os.path.basename(report)}'
      if os.path.exists(file):
        print(f'Deleting {file}')
        os.remove(file)
      history_df = history_df.append({"report": key, "network_history_file": report}, ignore_index = True)

history_df.to_csv(f'{REPORT_DIR}/2-column-deleted-network-history-report.csv', index = False)
history_df

Deleting ../..//reports/biGRU16-1x16-glove-sampling_none-199134-100-star_rating-history.pkl
Deleting ../..//reports/biGRU16-1x16-glove-sampling_none-497835-100-star_rating-history.pkl
Deleting ../..//reports/biGRU16-1x16-glove-sampling_none-995688-100-star_rating-history.pkl
Deleting ../..//reports/biGRU16-1x16-glove-sampling_none-2000000-100-star_rating-history.pkl
Deleting ../..//reports/LSTMB16_15-1x16-glove-sampling_none-673881-100-star_rating-history.pkl
Deleting ../..//reports/LSTMB16_15-1x16-glove-sampling_none-337040-100-star_rating-history.pkl
Deleting ../..//reports/LSTMB16_15-1x16-glove-sampling_none-134678-100-star_rating-history.pkl
Deleting ../..//reports/LSTMB16_15-1x16-glove-sampling_none-1352517-100-star_rating-history.pkl
Deleting ../..//reports/LSTMB16-1x16-glove-sampling_none-995688-100-star_rating-history.pkl
Deleting ../..//reports/LSTMB16_145-1x16-glove-sampling_none-839811-100-star_rating-history.pkl
Deleting ../..//reports/LSTMB16_1345-1x16-glove-sampling_none-

Unnamed: 0,network_history_file,report
0,drive/My Drive/Springboard/capstone/models/biG...,glove_embedding-dl_prototype-report.csv
1,drive/My Drive/Springboard/capstone/models/biG...,glove_embedding-dl_prototype-report.csv
2,drive/My Drive/Springboard/capstone/models/biG...,glove_embedding-dl_prototype-report.csv
3,drive/My Drive/Springboard/capstone/models/biG...,glove_embedding-dl_prototype-report.csv
4,drive/My Drive/Springboard/capstone/reports/LS...,2-column-converted-network-history-report.csv
5,drive/My Drive/Springboard/capstone/reports/LS...,2-column-converted-network-history-report.csv
6,drive/My Drive/Springboard/capstone/reports/LS...,2-column-converted-network-history-report.csv
7,drive/My Drive/Springboard/capstone/reports/LS...,2-column-converted-network-history-report.csv
8,drive/My Drive/Springboard/capstone/reports/LS...,2-column-converted-network-history-report.csv
9,drive/My Drive/Springboard/capstone/reports/LS...,2-column-converted-network-history-report.csv
