In [16]:
import pymongo

import numpy as np
import pandas as pd

from util import *

In [17]:
# Configuration of the database
client = pymongo.MongoClient("mongodb://localhost:27017/")
database = client['thesis']
db_dumps = database['dumps']
db_boards = database['boards']

# Configuration of data frames
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', -1)

In [18]:
# Extract useful data from the board id
extracted_ids_cols = ['Board', 'Wafer', 'Lot', 'X', 'Y']
extracted_ids_df = pd.DataFrame(columns=extracted_ids_cols)

boards_ids = list(set(i['board_id'] for i in db_dumps.find({},{"_id": 0, "board_id": 1})))

for board_id in boards_ids:
    data = extract_board_params(board_id)
    extracted_ids_df = extracted_ids_df.append(data, ignore_index=True)

In [19]:
extracted_ids_df.to_csv('./extracted_ids.csv', index=False)

In [21]:
# Loop over all the regions and calculate the diff in their data
# from all of the boards
# Filter out the ones that are too high or too low
regions_in_memory = sorted(list(set(i['mem_pos'] for i in db_dumps.find({"board_id": boards_ids[0]}))))

final_df = pd.DataFrame(columns=['Board', 'Region', 'Id_1', 'Id_2', 'Diff'])

# Compare all samples in all boards
for bid in boards_ids:
    df = pd.DataFrame(columns=['Board', 'Region', 'Id_1', 'Id_2', 'Diff'])
    data = compare_dumps_one_board(bid, df)
    final_df = final_df.append(data, ignore_index=True)

In [23]:
final_df.to_csv('./all_boards_individual.csv', index=False)

In [None]:
# Calculate the mean diff and std_dev for all regions combining all data
region_diffs_names = ["Region", "Mean_Diff", "Median", "Std_Dev", "Num_Samples"]
region_diffs_df = pd.DataFrame(columns=region_diffs_names)

In [24]:
# Calculate the mean diff and std_dev for all regions combining all data
region_diffs_names = ["Region", "Mean_Diff", "Median", "Std_Dev", "Num_Samples"]
region_diffs_df = pd.DataFrame(columns=region_diffs_names)

for region in regions_in_memory:
    samples_region = final_df[final_df['Region'] == region]

    data_dict = {'Region': region,
                 'Mean_Diff': samples_region['Diff'].mean(),
                 'Median': samples_region['Diff'].median(),
                 'Std_Dev': samples_region['Diff'].std(),
                 'Num_Samples': len(samples_region['Diff'].index)
                }
    region_diffs_df = region_diffs_df.append(data_dict, ignore_index=True)

In [25]:
region_diffs_df.to_csv('./all_regions_mean.csv', index=False)