In [1]:
## Set the width of cells to max ##
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [1]:
## Import libraries ##

import keras
from keras.layers import Dense, Dropout, Activation
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
import tensorflow as tf
import numpy as np
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from IPython.display import FileLink, FileLinks
import matplotlib.pyplot as plt
import pandas as pd
import sys
import pylab
from scipy.stats.stats import pearsonr

Using TensorFlow backend.


In [4]:
## Import the data ##

# List file names
datafilenames = ["Shared100NonShared/NormalizedCharge/output_final_1.h5","Shared100NonShared/AbsoluteCharge/output_final_1.h5",
                 "SharedHits/NormalizedCharge/output_final_1.h5","SharedHits/AbsoluteCharge/output_final_1.h5",
                "AllHits/NormalizedCharge/output_final_1.h5","AllHits/AbsoluteCharge/output_final_1.h5"]
# Select file
datafilename = datafilenames[4]

# One image has 20x20 pixels = 400 pixels 
pixels = ["pixel_{0}".format(i) for i in range(400)] # 20*20 = 400
def to_image(df):
    return  np.expand_dims(np.expand_dims(df[pixels], axis=-1).reshape(-1,20,20), axis=-1)

# Import data
df = pd.read_hdf("/uscms_data/d3/bbonham/TrackingDstar/LambaAnalyzer/output_of_postprocess/"+datafilename, key="df", mode='r', start=0, stop=200000)

# Print dataframe info after final cuts
print '\n\033[1m' + "No Cut on Sim Tracks" + '\033[0m' # name printed in bold
print df.info()

## Make final cuts ##

# Data set before making final cuts
df_old = df
df_old_train=df.sample(frac=0.5)
df_old_test=df.drop(df_old_train.index)
images_old_train = to_image(df_old_train)
images_old_test = to_image(df_old_test)

# Compute pixel_number variable
pixelColumns = ["pixel_%i" % x for x in range(400)]
pixels_df = df[pixelColumns].values
pixel_number = pixels_df.astype(bool).sum(axis=1)
df.insert(0, "Pixel_number", pixel_number)

##### Make final cuts: GenDeltaR<0.1, pixel number > 1, and no cut on sim tracks #####
df = df[ (df["GenDeltaR"]<0.1) & (df["Pixel_number"]>1) ]

# Print dataframe info after final cuts
print df.info()

###########################################

## Import the data ##

# List file names
datafilenames = ["Shared100NonShared/NormalizedCharge/output_final_1.h5","Shared100NonShared/AbsoluteCharge/output_final_1.h5",
                 "SharedHits/NormalizedCharge/output_final_1.h5","SharedHits/AbsoluteCharge/output_final_1.h5",
                "AllHits/NormalizedCharge/output_final_1.h5","AllHits/AbsoluteCharge/output_final_1.h5"]
# Select file
datafilename = datafilenames[4]

# One image has 20x20 pixels = 400 pixels 
pixels = ["pixel_{0}".format(i) for i in range(400)] # 20*20 = 400
def to_image(df):
    return  np.expand_dims(np.expand_dims(df[pixels], axis=-1).reshape(-1,20,20), axis=-1)

# Import data
df = pd.read_hdf("/uscms_data/d3/bbonham/TrackingDstar/LambaAnalyzer/output_of_postprocess/"+datafilename, key="df", mode='r', start=0, stop=200000)
#df = pd.read_hdf(datafilename, key="df", mode='r')

# Print dataframe info after final cuts
print '\n\033[1m' + "nUniqueSimTracks <= 0" + '\033[0m' # name printed in bold
print df.info()

## Make final cuts ##

# Data set before making final cuts
df_old = df
df_old_train=df.sample(frac=0.5)
df_old_test=df.drop(df_old_train.index)
images_old_train = to_image(df_old_train)
images_old_test = to_image(df_old_test)

# Compute pixel_number variable
pixelColumns = ["pixel_%i" % x for x in range(400)]
pixels_df = df[pixelColumns].values
pixel_number = pixels_df.astype(bool).sum(axis=1)
df.insert(0, "Pixel_number", pixel_number)

##### Make final cuts: GenDeltaR<0.1, pixel number > 1, and cut away nonshared hits #####
df = df[ (df["GenDeltaR"]<0.1) & (df["Pixel_number"]>1) & (df["nUniqueSimTracksInSharedHit"]<=0) ]

# Print dataframe info after final cuts
print df.info()

###########################################

## Import the data ##

# List file names
datafilenames = ["Shared100NonShared/NormalizedCharge/output_final_1.h5","Shared100NonShared/AbsoluteCharge/output_final_1.h5",
                 "SharedHits/NormalizedCharge/output_final_1.h5","SharedHits/AbsoluteCharge/output_final_1.h5",
                "AllHits/NormalizedCharge/output_final_1.h5","AllHits/AbsoluteCharge/output_final_1.h5"]
# Select file
datafilename = datafilenames[4]

# One image has 20x20 pixels = 400 pixels 
pixels = ["pixel_{0}".format(i) for i in range(400)] # 20*20 = 400
def to_image(df):
    return  np.expand_dims(np.expand_dims(df[pixels], axis=-1).reshape(-1,20,20), axis=-1)

# Import data
df = pd.read_hdf("/uscms_data/d3/bbonham/TrackingDstar/LambaAnalyzer/output_of_postprocess/"+datafilename, key="df", mode='r', start=0, stop=200000)
#df = pd.read_hdf(datafilename, key="df", mode='r')

# Print dataframe info after final cuts
print '\n\033[1m' + "nUniqueSimTracks != (0 or 1)" + '\033[0m' # name printed in bold
print df.info()

## Make final cuts ##

# Data set before making final cuts
df_old = df
df_old_train=df.sample(frac=0.5)
df_old_test=df.drop(df_old_train.index)
images_old_train = to_image(df_old_train)
images_old_test = to_image(df_old_test)

# Compute pixel_number variable
pixelColumns = ["pixel_%i" % x for x in range(400)]
pixels_df = df[pixelColumns].values
pixel_number = pixels_df.astype(bool).sum(axis=1)
df.insert(0, "Pixel_number", pixel_number)

##### Make final cuts: GenDeltaR<0.1, pixel number > 1, and sim tracks not zero or one#####
df = df[ (df["GenDeltaR"]<0.1) & (df["Pixel_number"]>1) & (df["nUniqueSimTracksInSharedHit"]!=0) & (df["nUniqueSimTracksInSharedHit"]!=1) ]

# Print dataframe info after final cuts
print df.info()


[1mNo Cut on Sim Tracks[0m
<class 'pandas.core.frame.DataFrame'>
Int64Index: 200000 entries, 0 to 199999
Columns: 406 entries, pixel_78 to pixel_148
dtypes: float64(405), int32(1)
memory usage: 620.3 MB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 43800 entries, 4 to 199999
Columns: 407 entries, Pixel_number to pixel_148
dtypes: float64(405), int32(1), int64(1)
memory usage: 136.2 MB
None

[1mnUniqueSimTracks <= 0[0m
<class 'pandas.core.frame.DataFrame'>
Int64Index: 200000 entries, 0 to 199999
Columns: 406 entries, pixel_78 to pixel_148
dtypes: float64(405), int32(1)
memory usage: 620.3 MB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 41260 entries, 4 to 199999
Columns: 407 entries, Pixel_number to pixel_148
dtypes: float64(405), int32(1), int64(1)
memory usage: 128.3 MB
None

[1mnUniqueSimTracks != (0 or 1)[0m
<class 'pandas.core.frame.DataFrame'>
Int64Index: 200000 entries, 0 to 199999
Columns: 406 entries, pixel_78 to pixel_148
dtypes: float64(405), int32(1