In [1]:
# %load_ext autoreload
# %autoreload 2
import pymongo

import numpy as np
import pandas as pd

from util import *

In [2]:
# Configuration of the database
client = pymongo.MongoClient("mongodb://localhost:27017/")
database = client['thesis']
db_dumps = database['dumps']
db_boards = database['boards']

# Configuration of data frames
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', -1)

## Extracción de los parametros de una placa

En el id de 96 bits de una placa estan codificados, por orden:

Oblea, Lote, Coordenada X en la oblea y Coordenada Y en la oblea.

In [3]:
boards_ids = list(set(i['board_id'] for i in db_dumps.find({},{"_id": 0, "board_id": 1})))
boards_ids_df = pd.DataFrame({'Board': boards_ids})

boards_params_cols = ['Board', 'Wafer', 'Lot', 'X', 'Y']
boards_params_df = pd.DataFrame(columns=boards_params_cols)

for board_id in boards_ids:
    data = extract_board_params(board_id)
    boards_params_df = boards_params_df.append(data, ignore_index=True)

In [4]:
boards_ids_df.to_csv('./csv/boards_ids.csv', index=False)
boards_params_df.to_csv('./csv/boards_params.csv', index=False)

## Comparación de las regiones en todas las placas

Para todas las placas, comparamos dos muestras de la misma region y calculamos cuantos bytes han variado.

In [5]:
regions_in_memory = sorted(list(set(i['mem_pos'] for i in db_dumps.find({"board_id": boards_ids[0]}))))
regions_in_memory_df = pd.DataFrame({'Region': regions_in_memory})

diffs_regions_all_boards_df = pd.DataFrame(columns=['Board', 'Region', 'Id_1', 'Id_2', 'Diff'])

for bid in boards_ids:
    df = pd.DataFrame(columns=['Board', 'Region', 'Id_1', 'Id_2', 'Diff'])
    data = compare_dumps_one_board(bid, df)
    diffs_regions_all_boards_df = diffs_regions_all_boards_df.append(data, ignore_index=True)

In [6]:
diffs_regions_all_boards_df.to_csv('./csv/diffs_regions_all_boards.csv', index=False)

## Promedio de las diferencias de las regiones en todas las placas

Creamos un promedio de los datos anteriores para tener una visión general.

In [7]:
region_diffs_cols = ["Region", "Mean_Diff", "Median", "Std_Dev", "Num_Samples"]
region_diffs_df = pd.DataFrame(columns=region_diffs_cols)

for region in regions_in_memory:
    samples_region = diffs_regions_all_boards_df[diffs_regions_all_boards_df['Region'] == region]

    data_dict = {'Region': region,
                 'Mean_Diff': samples_region['Diff'].mean(),
                 'Median': samples_region['Diff'].median(),
                 'Std_Dev': samples_region['Diff'].std(),
                 'Num_Samples': len(samples_region['Diff'].index)
                }
    region_diffs_df = region_diffs_df.append(data_dict, ignore_index=True)

In [8]:
region_diffs_df.to_csv('./csv/diffs_regions_all_boards_summary.csv', index=False)

## Diferencias de muestras entre placas

Ahora comparamos muestras de una region de memoria en dos placas.
Hay que comprobar la misma zona en dos placas para ver la variacion entre placas.

In [9]:
import multiprocessing as mp

diffs_regions_pair_boards_cols = ['Region', 'Board_1', 'Board_2', 'Id_1', 'Id_2', 'Diff']
diffs_regions_pair_boards_df = pd.DataFrame(columns=diffs_regions_pair_boards_cols)

pool = mp.Pool(mp.cpu_count())

board_pairs = list(combinations(boards_ids, 2))

for region in regions_in_memory:
    
    for (board_1, board_2) in board_pairs:
        dumps_board_1 = list(db_dumps.find({'board_id': board_1, 'mem_pos':region}))
        dumps_board_2 = list(db_dumps.find({'board_id': board_2, 'mem_pos':region}))
        
        num_samples = min(len(dumps_board_1), len(dumps_board_2))
        
        for sample in range(num_samples):
            diff = calculate_diff(dumps_board_1[sample], dumps_board_2[sample])
            
            data_dict = {'Region': region,
                         'Board_1': board_1,
                         'Board_2': board_2,
                         'Id_1': dumps_board_1[sample]['_id'],
                         'Id_2': dumps_board_2[sample]['_id'],
                         'Diff': diff 
                        }
            diffs_regions_pair_boards_df = diffs_regions_pair_boards_df.append(data_dict, ignore_index=True)

NameError: name 'all_regions_pair_boards_cols' is not defined

In [None]:
diffs_regions_pair_boards_df.to_csv('./csv/diffs_regions_pair_boards.csv', index=False)

In [None]:
diffs_regions_pair_boards_summary_cols = ['Region', 'Mean_Diff', 'Median', 'Std_Dev', 'Num_Samples']
diffs_regions_pair_boards_summary_df = pd.DataFrame(columns=all_regions_pair_boards_summary_cols)

for region in regions_in_memory:
    samples_region = diffs_regions_pair_boards_df[diffs_regions_pair_boards_df['Region'] == region]

    data_dict = {'Region': region,
                 'Mean_Diff': samples_region['Diff'].mean(),
                 'Median': samples_region['Diff'].median(),
                 'Std_Dev': samples_region['Diff'].std(),
                 'Num_Samples': len(samples_region['Diff'].index)
                }
    diffs_regions_pair_boards_summary_df = diffs_regions_pair_boards_summary_df.append(data_dict, ignore_index=True)

In [None]:
diffs_regions_pair_boards_summary_df.to_csv('./csv/diffs_regions_pair_boards_summary.csv', index=False)
diffs_regions_pair_boards_summary_df