# Working Notebook for creating wndchrm features

## Step 1
### Load nessecary modules

In [1]:
# Add python modules

import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from copy import deepcopy

# Add parent directory for custom modules
from sys import path as sysPath
sysPath.append('../')
sysPath.append('../Machine_Score/')

# Load custom modules that simr has loaded
import main_SIMR as simr
gm = simr.gm
im = simr.im
ic = simr.ic
fe = simr.fe
ms = simr.ms
sa = simr.sa
dc = simr.ms.dc

simr.test()
gm.test()
im.test()
ic.test()
fe.test()
ms.test()
dc.test()
sa.test()

SIMR: Hi!  You're in Matthew's main program for all things galaxy collisions
GM: Hi!  You're in Matthew's module for generally useful functions and classes
IM: Hi!  You're in Matthew's information module for SPAM
IC: Hi!  You're in Matthew's main code for all things image creation.
FE: Hi!  You're in Matthew's module for extracting feature values from images.
MS: Hi!  You're in Matthew's SIMR module for all things machine scoring images
DC: Hi!  You're in direct_image_compare.py
SA: Hi!  You're in Matthew's Main program for score analysis!


___
## Step 2
### Load Target Info class
Needed for loading the target image and running through models later.

In [2]:
tDir = '../targetDir'
tDir = gm.validPath(tDir)

tInfo = im.target_info_class( targetDir = tDir, printAll=False)
if tInfo.status == False:
    print("WARNING: target info class bad")
else:
    print("Target Good!: %s" % tInfo.get('target_id'))

# Get run info class
rInfo = tInfo.getRunInfo( rID='run_0000' )

if rInfo.status == False:
    print("WARNING")
else:
    print("Run Good!: %s"%rInfo.get('run_id'))

Target Good!: 587722984435351614
Run Good!: run_0000


___
## Step 4: Create a new image parameter


In [3]:
chimeLoc = './../param/chime_group_1.json'
chime_group_1 = gm.readJson( chimeLoc )
#gm.pprint(chime_group_1['chime_1'])

In [4]:
testArg = gm.inArgClass()
testArg.printAll = True

In [5]:
fe.target_collect_wndchrm_all_raw( testArg, tInfo = tInfo )

FE: target_collect_wndchrm_all_raw
FE: target_collect_wndchrm_all_raw: Loop: 1293 / 1293: Complete!
	 - Read 1290 of 1293 wndchrm run files.
	 - Final WNDCHRM DataFrame
	 - Shape: (61921, 1062)
	 - Unique Runs: 1290
	 - Unique Image Names: 49
	 - Header Length: 1062
	 - Headers head: [ run_id, image_name, zoo_merger_score, ... ]
	 - Headers tail: [ ... , Zernike Coefficients (Fourier ()) [70], Zernike Coefficients (Fourier ()) [71] ]


In [10]:
runsRaw = pd.read_pickle( tInfo.wndRunRawLoc )
targetRaw = pd.read_csv( tInfo.wndTargetRawLoc )
print("read")

read


In [7]:

# Function to collect model wndchrm values and normalize them.
def target_wndchrm_create_norm_scaler( args, tInfo, normDict, groupParam = None ):
    
    printAll = args.printAll
    printBase = args.printBase
    
    if printBase:
        print( "FE: normalize_target_wnchrm." )
        gm.tabprint( "tID: %s" % tInfo.get( 'target_id' ) )
        gm.tabprint( "Normalization Parameters")
        gm.pprint( normDict )
        
    # Useful variables
    
    infoHeaders = [ 'run_id', 'target_id', 'image_name', 'zoo_merger_score' ]
        
    # Remove quotes later
    '''
    target_collect_wndchrm_all_raw( args, tInfo )
    runsRaw = pd.read_csv( tInfo.wndRunRawLoc )
    targetRaw = pd.read_csv( tInfo.wndTargetRawLoc )
    '''
    
    allRawDF = pd.concat( [ runsRaw, targetRaw ] )
    allInfoDF = allRawDF[ infoHeaders ]
    
    if printAll:
        gm.tabprint( 'Target Shape: %s' % str( targetRaw.shape ) )
        gm.tabprint( 'Runs Shape: %s' % str( runsRaw.shape ) )
        gm.tabprint( 'All Raw Shape: %s' % str( allRawDF.shape ) )
    
    
    # Combine top N models and target 
    if normDict.get( 'top_models', None) != None:
        
        topN = int( normDict['top_models'] )
        
        if printAll:
            gm.tabprint( 'top_models: %d' % topN )
        
        # Grab names of top N models
        # assume run_id is listed in alphanumerical order of best.  This will likely change later
        runIDList = list(runsRaw['run_id'].unique())[0:topN]        
        topRunRaw = runsRaw[ runsRaw['run_id'].isin(runIDList) ]        
        trainDF = pd.concat( [ topRunRaw, targetRaw ] )
        
        if printAll: gm.tabprint("Shape top N: %s" % str( trainDF.shape ) )
    
    # Combine all models and target.
    else:
        trainDF = pd.concat( [ runsRaw, targetRaw ] )
         
    
    # Extract only images from image_group
    if groupParam != None:
        imgNameList = [ groupParam[pKey]['imgArg']['name'] for pKey in groupParam ]
        trainDF = trainDF[ trainDF['image_name'].isin(imgNameList) ]
        
        if printAll:    gm.tabprint("Shape image group: %s" % str( trainDF.shape ) )
    
    if printAll:    gm.tabprint('All Raw Shape: %s'%str(trainDF.shape))
    
    # Headers not in info headers are assumed feature value header names
    featHeaders = [ h for h in trainDF if h not in infoHeaders ]

    # Remove any rows with nan values in feature headers
    trainDF = trainDF[ ~trainDF[ featHeaders ].isin([np.nan, np.inf, -np.inf]).any(1)]
    if printAll:    gm.tabprint('filtered out: %s' % str(trainDF.shape))
        
    # Seperate information columns from feature value columns being normalized.
    trainRaw = trainDF.drop( infoHeaders, axis=1 ).values
    
    if printAll:
        gm.tabprint('info Headers: [ %s ]' % ', '.join(infoHeaders))
        gm.tabprint('feat value Shape: %s' % str(trainRaw.shape ) )
    
    # Determine what method to use for normalizing data and create a scaler model
    normMethod = normDict.get( 'normalization_method', 'sklearn_StandardScaler' )
    
    if printAll:    gm.tabprint("Creating Scaler: %s" % normMethod )
        
    if normMethod == 'sklearn_StandardScaler':
        from sklearn.preprocessing import StandardScaler 
        scaler = StandardScaler()
        featScaled = scaler.fit_transform( trainRaw )
    
    else:
        print("WARNING: FE: normalize_target_wndchrm")
        gm.tabprint("Normalization Method Not Found: %s" % normMethod )
        return
    
    if printAll:    gm.tabprint("Scaler Complete. Saving...")

    # Have target info save scaler file
    tInfo.saveWndchrmScaler( scaler, normDict.get('name', 'default') )
    
    
    # Apply new scaler to all the feature data      
    if printAll: gm.tabprint("Applying scaler to all data")    
        
    featRawValues = allRawDF.drop( infoHeaders, axis=1 ).values  
    
    if printAll: 
        gm.tabprint("Raw Feat Values Shape: %s" % str( featRawValues.shape ) )
        gm.tabprint("Transforming Raw Feat Values..." )
    
    featScaledValues = scaler.transform( featRawValues )
    
    if printAll: 
        gm.tabprint("Transform Complete" )
        
    
    scaledDF = pd.DataFrame( featScaledValues, columns = featHeaders )
    
    #pd.concat([df1, df4.reindex(df1.index)], axis=1)
    
    if printAll: 
        gm.tabprint("Scaled Feat Values Shape: %s" % str( scaledDF.shape ))
        gm.tabprint("Scaled Info Values Shape: %s" % str( allInfoDF.shape ))
    
    # In
    for i,h in enumerate( infoHeaders ):
        scaledDF.insert(i, h, allInfoDF[h].values )
    
    if printAll: 
        gm.tabprint("Scaled DF Shape: %s" % str( scaledDF.shape ))
        gm.tabprint("Saving scaled DF...")
    
    tInfo.saveWndchrmDF( scaledDF, normDict['name'] )
               
# Have Target collect wndchrm values and collect into one file

norm_wndchrm_all_test = {}
norm_wndchrm_all_test['name'] = 'norm_wndchrm_test'
norm_wndchrm_all_test['top_models'] = 500
norm_wndchrm_all_test['image_group'] = 'chime_group_1'
norm_wndchrm_all_test['normalization_method'] = 'sklearn_StandardScaler'

normLoc = '../param/' + norm_wndchrm_all_test['name'] + '.json'
gm.saveJson( norm_wndchrm_all_test, normLoc )
#gm.pprint( norm_wndchrm_all_test )

tArg = gm.inArgClass()
tArg.printAll = True

target_wndchrm_create_norm_scaler( tArg, tInfo, norm_wndchrm_all_test, groupParam = chime_group_1 )
print("Done")

FE: normalize_target_wnchrm.
	 - tID: 587722984435351614
	 - Normalization Parameters
{'image_group': 'chime_group_1',
 'name': 'norm_wndchrm_test',
 'normalization_method': 'sklearn_StandardScaler',
 'top_models': 500}
	 - Target Shape: (1, 1062)
	 - Runs Shape: (61921, 1062)
	 - All Raw Shape: (61922, 1063)
	 - top_models: 500
	 - Shape top N: (24002, 1063)
	 - Shape image group: (24001, 1063)
	 - All Raw Shape: (24001, 1063)
	 - filtered out: (24000, 1063)
	 - info Headers: [ run_id, target_id, image_name, zoo_merger_score ]
	 - feat value Shape: (24000, 1059)
	 - Creating Scaler: sklearn_StandardScaler
	 - Scaler Complete. Saving...
	 - Applying scaler to all data
	 - Raw Feat Values Shape: (61922, 1059)
	 - Transforming Raw Feat Values...
	 - Transform Complete
	 - Scaled Feat Values Shape: (61922, 1059)
	 - Scaled Info Values Shape: (61922, 4)
	 - Scaled DF Shape: (61922, 1063)
	 - Saving scaled DF...
Done


In [8]:
fe.wndchrm_target_all( gm.inArgClass(), tInfo )

	 - Collecting wndchrm all data


In [9]:
tInfo.printAll = True
tInfo.gatherRunInfos()
print("Done")

IM: Target.gatherRunInfos
	 - IM: gather_run_info LOOP: 1293 / 1293 COMPLETE
IM: Target.saveInfoFile():
	 - Saving target info file...
Done
