In [1]:
# %load_ext autoreload
# %autoreload 2
import pymongo

import numpy as np
import pandas as pd

from util import *

In [2]:
# Configuration of the database
client = pymongo.MongoClient("mongodb://localhost:27017/")
database = client['thesis']
db_dumps = database['dumps']
db_boards = database['boards']

# Configuration of data frames
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', -1)

## Extracción de los parametros de una placa

En el id de 96 bits de una placa estan codificados, por orden:

Oblea, Lote, Coordenada X en la oblea y Coordenada Y en la oblea.

In [3]:
boards_ids = list(set(i['board_id'] for i in db_dumps.find({},{"_id": 0, "board_id": 1})))
boards_ids_df = pd.DataFrame({'Board': boards_ids})

num_boards = list(db_dumps.aggregate([{"$group": { "_id": "$board_id" }}, {"$count": "boards"}]))[0]['boards']

boards_params_cols = ['Board', 'Type', 'Wafer', 'Lot', 'X', 'Y']
boards_params_df = pd.DataFrame(columns=boards_params_cols)

for board_id in boards_ids:
    data = extract_board_params(board_id)
    if db_dumps.count_documents({'board_id': board_id, 'mem_pos': '0x20005a00'}) > 0.:
        data['Type'] = 64
    else:
        data['Type'] = 32
    boards_params_df = boards_params_df.append(data, ignore_index=True)

In [4]:
print(len(boards_ids))

206


In [5]:
boards_ids_df.to_csv('./csv/boards_ids.csv', index=False)
boards_params_df.to_csv('./csv/boards_params.csv', index=False)

# Union de todos los datos en una sola colection

Aqui se combinan y se modifican todos los datos de la coleccion `dumps` para poder ser exportados y trabajados debidamente.

En esta collecion, se combinan los datos de `boards_params.csv` con los datos de la colection `dumps`. Sin embargo, se han modificado los bytes de cada muestra para guardarse como bits individuales

In [10]:
db_data = database['data']

dumps_list = list(db_dumps.find()) 

# for dump in dumps_list[140458:]:
for dump in dumps_list:
    final_data = {}
    final_data['_id'] = dump['_id']
    
    # Extraccion de parametros
    data_params = extract_board_params(dump['board_id'])
    final_data.update(data_params)
    if db_dumps.count_documents({'board_id': dump['board_id'], 'mem_pos': '0x20005a00'}) > 0:
        final_data['Type'] = 64
    else:
        final_data['Type'] = 32
        
    final_data['Mem_pos'] = dump['mem_pos']
    final_data['Temp'] = dump['temp']
    final_data['Vdd'] = dump['vdd']
    
    if isinstance(dump['temp_cal_30'], list) or isinstance(dump['temp_cal_30'], tuple): 
        final_data['Temp_cal_30'] = dump['temp_cal_30'][0]
    else:
        final_data['Temp_cal_30'] = dump['temp_cal_30']
        
    if isinstance(dump['temp_cal_110'], list) or isinstance(dump['temp_cal_110'], tuple): 
        final_data['Temp_cal_110'] = dump['temp_cal_110'][0]
    else:
        final_data['Temp_cal_110'] = dump['temp_cal_110']
        
    if isinstance(dump['vrefint_cal'], list) or isinstance(dump['vrefint_cal'], tuple): 
        final_data['Vrefint_cal'] = dump['vrefint_cal'][0]
    else:
        final_data['Vrefint_cal'] = dump['vrefint_cal']
        
        
    final_data['Timestamp'] = dump['timestamp']
    
    # Conversion de los bytes a bit
    bits_data = []
    for byte in dump['data']:
        bits_str = f'{byte:08b}'
        bits_data += [int(b) for b in list(bits_str)]
        
    final_data['Data'] = bits_data
    
#    db_data.insert_one(final_data)
    try:
        db_data.insert_one(final_data)
    except:
        continue

# Extraccion de la base de datos

In [None]:
full_db_df = pd.DataFrame(columns=['Board', 'Type', 'Wafer', 'Lot', 'X', 'Y', ''])

## Comparación de las regiones en todas las placas

Para todas las placas, comparamos dos muestras de la misma region y calculamos cuantos bytes han variado.

In [None]:
regions_in_memory = sorted(list(set(i['mem_pos'] for i in db_dumps.find({"board_id": boards_ids[0]}))))
regions_in_memory_df = pd.DataFrame({'Region': regions_in_memory})

diffs_regions_all_boards_df = pd.DataFrame(columns=['Board', 'Region', 'Id_1', 'Id_2', 'Diff'])

for bid in boards_ids:
    df = pd.DataFrame(columns=['Board', 'Region', 'Id_1', 'Id_2', 'Diff'])
    data = compare_dumps_one_board(bid, df)
    diffs_regions_all_boards_df = diffs_regions_all_boards_df.append(data, ignore_index=True)

In [None]:
diffs_regions_all_boards_df.to_csv('./csv/diffs_regions_all_boards.csv', index=False)

## Promedio de las diferencias de las regiones en todas las placas

Creamos un promedio de los datos anteriores para tener una visión general.

In [None]:
region_diffs_cols = ["Region", "Mean_Diff", "Median", "Std_Dev", "Num_Samples"]
region_diffs_df = pd.DataFrame(columns=region_diffs_cols)

for region in regions_in_memory:
    samples_region = diffs_regions_all_boards_df[diffs_regions_all_boards_df['Region'] == region]

    data_dict = {'Region': region,
                 'Mean_Diff': samples_region['Diff'].mean(),
                 'Median': samples_region['Diff'].median(),
                 'Std_Dev': samples_region['Diff'].std(),
                 'Num_Samples': len(samples_region['Diff'].index)
                }
    region_diffs_df = region_diffs_df.append(data_dict, ignore_index=True)

In [None]:
region_diffs_df.to_csv('./csv/diffs_regions_all_boards_summary.csv', index=False)

## Diferencias de muestras entre placas

Ahora comparamos muestras de una region de memoria en dos placas.
Hay que comprobar la misma zona en dos placas para ver la variacion entre placas.

In [None]:
import multiprocessing as mp

diffs_regions_pair_boards_cols = ['Region', 'Board_1', 'Board_2', 'Id_1', 'Id_2', 'Diff']
diffs_regions_pair_boards_df = pd.DataFrame(columns=diffs_regions_pair_boards_cols)

pool = mp.Pool(mp.cpu_count())

board_pairs = list(combinations(boards_ids, 2))

for region in regions_in_memory:
    
    for (board_1, board_2) in board_pairs:
        dumps_board_1 = list(db_dumps.find({'board_id': board_1, 'mem_pos':region}))
        dumps_board_2 = list(db_dumps.find({'board_id': board_2, 'mem_pos':region}))
        
        num_samples = min(len(dumps_board_1), len(dumps_board_2))
        
        for sample in range(num_samples):
            diff = calculate_diff(dumps_board_1[sample], dumps_board_2[sample])
            
            data_dict = {'Region': region,
                         'Board_1': board_1,
                         'Board_2': board_2,
                         'Id_1': dumps_board_1[sample]['_id'],
                         'Id_2': dumps_board_2[sample]['_id'],
                         'Diff': diff 
                        }
            diffs_regions_pair_boards_df = diffs_regions_pair_boards_df.append(data_dict, ignore_index=True)

In [None]:
diffs_regions_pair_boards_df.to_csv('./csv/diffs_regions_pair_boards.csv', index=False)

In [None]:
diffs_regions_pair_boards_summary_cols = ['Region', 'Mean_Diff', 'Median', 'Std_Dev', 'Num_Samples']
diffs_regions_pair_boards_summary_df = pd.DataFrame(columns=diffs_regions_pair_boards_summary_cols)

for region in regions_in_memory:
    samples_region = diffs_regions_pair_boards_df[diffs_regions_pair_boards_df['Region'] == region]

    data_dict = {'Region': region,
                 'Mean_Diff': samples_region['Diff'].mean(),
                 'Median': samples_region['Diff'].median(),
                 'Std_Dev': samples_region['Diff'].std(),
                 'Num_Samples': len(samples_region['Diff'].index)
                }
    diffs_regions_pair_boards_summary_df = diffs_regions_pair_boards_summary_df.append(data_dict, ignore_index=True)

In [None]:
diffs_regions_pair_boards_summary_df.to_csv('./csv/diffs_regions_pair_boards_summary.csv', index=False)