In [150]:
import numpy as np
import pandas as pd
from PIL import Image
from scipy.stats import skew
from sklearn.ensemble import RandomForestClassifier
from save_load import save_load
save_load = save_load()

In [3]:
radar = Image.open('Radar_Images/tiff/s_0025xx/s_00258001_tiff.tif')
radar_mat = np.array(radar)
radar_mat.shape

(3600, 1891)

In [27]:
df = pd.read_csv("radar.csv")
df = df.drop(columns = "Unnamed: 0")
df

Unnamed: 0,long_start,long_end,lat_start,lat_end,center_long,center_lat,area,depth,lbl,tif,tab,long_dir,lat_dir,width
0,230.676760,229.390720,64.239113,59.864001,230.033740,62.051557,5.626569,0.079063,Radar_Images/tiff/s_0016xx/s_00168901_tiff.lbl,Radar_Images/tiff/s_0016xx/s_00168901_tiff.tif,Radar_Images/geom/s_0016xx/s_00168901_geom.tab,decreasing,decreasing,582
1,224.531750,222.273160,33.739494,16.946401,223.402455,25.342948,37.928712,-32768.000000,Radar_Images/tiff/s_0016xx/s_00168902_tiff.lbl,Radar_Images/tiff/s_0016xx/s_00168902_tiff.tif,Radar_Images/geom/s_0016xx/s_00168902_geom.tab,decreasing,decreasing,2177
2,204.730800,183.156570,-74.827200,-84.810770,193.943685,-79.818985,215.387835,0.026411,Radar_Images/tiff/s_0016xx/s_00169001_tiff.lbl,Radar_Images/tiff/s_0016xx/s_00169001_tiff.tif,Radar_Images/geom/s_0016xx/s_00169001_geom.tab,decreasing,decreasing,1364
3,175.837720,171.496620,-76.866730,-80.226770,173.667170,-78.546750,14.586270,0.063487,Radar_Images/tiff/s_0016xx/s_00169101_tiff.lbl,Radar_Images/tiff/s_0016xx/s_00169101_tiff.tif,Radar_Images/geom/s_0016xx/s_00169101_geom.tab,decreasing,decreasing,461
4,327.585390,278.401460,83.964035,87.306125,302.993425,85.635080,164.377121,-32768.000000,Radar_Images/tiff/s_0016xx/s_00169102_tiff.lbl,Radar_Images/tiff/s_0016xx/s_00169102_tiff.tif,Radar_Images/geom/s_0016xx/s_00169102_geom.tab,decreasing,increasing,607
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24033,159.782810,150.944200,80.544693,71.817875,155.363505,76.181284,77.132941,0.171973,Radar_Images/tiff/s_0623xx/s_06238902_tiff.lbl,Radar_Images/tiff/s_0623xx/s_06238902_tiff.tif,Radar_Images/geom/s_0623xx/s_06238902_geom.tab,decreasing,decreasing,1147
24034,71.376434,59.156655,74.838081,29.702621,65.266544,52.270351,551.545346,0.704263,Radar_Images/tiff/s_0623xx/s_06239201_tiff.lbl,Radar_Images/tiff/s_0623xx/s_06239201_tiff.tif,Radar_Images/geom/s_0623xx/s_06239201_geom.tab,decreasing,decreasing,5831
24035,329.186250,326.477110,-32.683760,-49.650850,327.831680,-41.167305,45.966222,-32768.000000,Radar_Images/tiff/s_0623xx/s_06239601_tiff.lbl,Radar_Images/tiff/s_0623xx/s_06239601_tiff.tif,Radar_Images/geom/s_0623xx/s_06239601_geom.tab,decreasing,decreasing,2188
24036,278.998440,277.147240,1.943925,-12.887730,278.072840,-5.471903,27.456360,-32768.000000,Radar_Images/tiff/s_0623xx/s_06239801_tiff.lbl,Radar_Images/tiff/s_0623xx/s_06239801_tiff.tif,Radar_Images/geom/s_0623xx/s_06239801_geom.tab,decreasing,decreasing,1914


In [52]:
float(df["depth"][df["tif"] == "Radar_Images/tiff/s_0016xx/s_00168901_tiff.tif"])

0.079063028

In [147]:
def relevantColumns(ncol, nwant):
    """Given the number of columns and the number of wanted center columns,
    return a vector with the indices of the wanted columns"""
    cut = (ncol - nwant) / 2
    top_cut = np.floor(cut)
    bottom_cut = np.ceil(cut)
    selected = list(range(int(bottom_cut), int(ncol - top_cut)))
    return(selected)


def extractFeatures(tif_path):
    """Given file path to a .tif file, return a list of features"""
    print(tif_path)
    radar = Image.open(tif_path)
    radar_mat = np.array(radar)
    radar_mat = radar_mat[:,relevantColumns(radar_mat.shape[1], 3000)]
    features = []
    
    # FEATURE 1 FOR CLASSIFICATION
    # CLASSIFICATION: ice or not
    if float(df["depth"][df["tif"] == tif_path]) == -32768:
        features.append("no")
    else:
        features.append("yes")
        
    # FEATURE 2
    # mean intensity of radargram
    features.append(np.mean(radar_mat))
    
    # FEATURE 3
    # standard deviation of intensity of radargram
    features.append(np.std(radar_mat))
    
    # FEATURE 4
    # skewness of intensity of intensity of radargram
    features.append(skew(radar_mat.flatten()))
    
    # FEATURE 5
    # color histogram
    color_hist, bin_edges = np.histogram(radar_mat, bins=25)
    features.extend(color_hist)
    
    return(features)


def sampleTIF(n):
    """Given sample size n, return a sample of radargrams from GLOBAL VARIABLE df"""
    # CLASSIFICATION
    # Include radargrams with width > 3000
    df3000 = df[df["width"] > 3000]
    num = df3000.shape[0]
    sample_indices = np.random.choice(num, n)
    sampled = df3000.iloc[sample_indices,:]
    return(sampled)

In [84]:
n = 500
sample = sampleTIF(n)
half1 = list(range(0, int(n/2)))
half2 = list(range(int(n/2), n-1))
features = sample["tif"].apply(extractFeatures)

Radar_Images/tiff/s_0580xx/s_05805901_tiff.tif
Radar_Images/tiff/s_0251xx/s_02515001_tiff.tif
Radar_Images/tiff/s_0070xx/s_00700901_tiff.tif
Radar_Images/tiff/s_0390xx/s_03907102_tiff.tif
Radar_Images/tiff/s_0372xx/s_03724002_tiff.tif
Radar_Images/tiff/s_0444xx/s_04448202_tiff.tif
Radar_Images/tiff/s_0363xx/s_03636301_tiff.tif
Radar_Images/tiff/s_0051xx/s_00515601_tiff.tif
Radar_Images/tiff/s_0382xx/s_03821701_tiff.tif
Radar_Images/tiff/s_0250xx/s_02505601_tiff.tif
Radar_Images/tiff/s_0058xx/s_00581302_tiff.tif
Radar_Images/tiff/s_0606xx/s_06067101_tiff.tif
Radar_Images/tiff/s_0338xx/s_03383701_tiff.tif
Radar_Images/tiff/s_0498xx/s_04984902_tiff.tif
Radar_Images/tiff/s_0060xx/s_00600502_tiff.tif
Radar_Images/tiff/s_0356xx/s_03565301_tiff.tif
Radar_Images/tiff/s_0232xx/s_02329901_tiff.tif
Radar_Images/tiff/s_0620xx/s_06201601_tiff.tif
Radar_Images/tiff/s_0265xx/s_02655202_tiff.tif
Radar_Images/tiff/s_0269xx/s_02698402_tiff.tif
Radar_Images/tiff/s_0361xx/s_03616903_tiff.tif
Radar_Images/

Radar_Images/tiff/s_0332xx/s_03326301_tiff.tif
Radar_Images/tiff/s_0229xx/s_02291201_tiff.tif
Radar_Images/tiff/s_0392xx/s_03922201_tiff.tif
Radar_Images/tiff/s_0244xx/s_02443501_tiff.tif
Radar_Images/tiff/s_0571xx/s_05715802_tiff.tif
Radar_Images/tiff/s_0371xx/s_03719301_tiff.tif
Radar_Images/tiff/s_0371xx/s_03715501_tiff.tif
Radar_Images/tiff/s_0166xx/s_01665801_tiff.tif
Radar_Images/tiff/s_0490xx/s_04900901_tiff.tif
Radar_Images/tiff/s_0460xx/s_04602501_tiff.tif
Radar_Images/tiff/s_0376xx/s_03761601_tiff.tif
Radar_Images/tiff/s_0162xx/s_01625801_tiff.tif
Radar_Images/tiff/s_0052xx/s_00521602_tiff.tif
Radar_Images/tiff/s_0550xx/s_05505401_tiff.tif
Radar_Images/tiff/s_0331xx/s_03312601_tiff.tif
Radar_Images/tiff/s_0329xx/s_03296301_tiff.tif
Radar_Images/tiff/s_0555xx/s_05553001_tiff.tif
Radar_Images/tiff/s_0240xx/s_02404301_tiff.tif
Radar_Images/tiff/s_0058xx/s_00586701_tiff.tif
Radar_Images/tiff/s_0133xx/s_01332001_tiff.tif
Radar_Images/tiff/s_0509xx/s_05093701_tiff.tif
Radar_Images/

Radar_Images/tiff/s_0340xx/s_03409001_tiff.tif
Radar_Images/tiff/s_0067xx/s_00672302_tiff.tif
Radar_Images/tiff/s_0580xx/s_05801601_tiff.tif
Radar_Images/tiff/s_0265xx/s_02656401_tiff.tif
Radar_Images/tiff/s_0403xx/s_04032001_tiff.tif
Radar_Images/tiff/s_0387xx/s_03873001_tiff.tif
Radar_Images/tiff/s_0257xx/s_02570401_tiff.tif
Radar_Images/tiff/s_0269xx/s_02694601_tiff.tif
Radar_Images/tiff/s_0351xx/s_03512101_tiff.tif
Radar_Images/tiff/s_0251xx/s_02517801_tiff.tif
Radar_Images/tiff/s_0080xx/s_00800701_tiff.tif
Radar_Images/tiff/s_0065xx/s_00657803_tiff.tif
Radar_Images/tiff/s_0135xx/s_01357101_tiff.tif
Radar_Images/tiff/s_0471xx/s_04710601_tiff.tif
Radar_Images/tiff/s_0581xx/s_05818201_tiff.tif
Radar_Images/tiff/s_0395xx/s_03953001_tiff.tif
Radar_Images/tiff/s_0580xx/s_05807903_tiff.tif
Radar_Images/tiff/s_0343xx/s_03433301_tiff.tif
Radar_Images/tiff/s_0468xx/s_04680403_tiff.tif
Radar_Images/tiff/s_0266xx/s_02663501_tiff.tif
Radar_Images/tiff/s_0470xx/s_04702401_tiff.tif
Radar_Images/

In [139]:
features_df = pd.DataFrame(features.tolist())
feature_names = ["ice", "mean", "sd", "skew"]
feature_names.extend(["".join(["color_hist", str(i)]) for i in range(1, 26)])
features_df.columns = feature_names
big = features_df
big

Unnamed: 0,ice,mean,sd,skew,color_hist1,color_hist2,color_hist3,color_hist4,color_hist5,color_hist6,...,color_hist16,color_hist17,color_hist18,color_hist19,color_hist20,color_hist21,color_hist22,color_hist23,color_hist24,color_hist25
0,no,23.836873,18.870250,2.404659,2587938,2300939,2548457,1904729,913965,298731,...,5340,3939,3083,2618,2000,1656,1086,804,560,573
1,yes,38.155981,19.190258,1.509170,627989,1059020,1924368,2546819,2337505,1490655,...,6235,4277,3228,2566,1877,1491,952,609,375,471
2,no,31.953989,18.892281,1.935628,1191712,1575539,2378806,2562605,1843349,871934,...,6495,4968,4018,3168,2469,1978,1222,745,458,481
3,no,29.292354,17.665229,1.421385,1376600,1705457,2458428,2510853,1702026,717604,...,3825,3172,2527,1899,1428,990,648,301,116,35
4,yes,49.664304,20.056276,1.294642,179405,409045,976753,1789173,2422355,2514192,...,7265,5354,4370,3421,2685,2439,1771,1376,1024,717
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,yes,51.082800,20.750607,1.492565,149795,363319,902963,1706478,2374574,2543818,...,8896,6364,5208,4180,3600,3228,2226,1825,1248,1632
496,yes,43.310140,18.350823,1.019845,345592,672869,1413556,2245615,2581391,2165554,...,4905,3401,2687,1953,1350,1056,576,393,279,255
497,no,27.476612,19.977855,1.965260,1986426,2041976,2525787,2160469,1211979,472511,...,6966,4205,2853,1868,1174,823,463,286,196,244
498,yes,21.610645,19.276352,3.679376,3036790,2473232,2522338,1701149,709087,190371,...,5621,4561,4249,3900,3600,3567,2822,2515,1941,4348


In [151]:
save_load.save_object(big, "big")

In [140]:
x = big.iloc[0:250, 1:30]
y = big.iloc[0:250, 0]

x_test = big.iloc[250:500, 1:30]
y_test = big.iloc[250:500, 0]

In [144]:
rf = RandomForestClassifier()
rf.fit(x, y)
rf.score(x_test, y_test)

0.8