Apply machine results to data from another month. Visually evaluate the results
--

In [2]:
import pandas as pd
import numpy as np
import os

**read train/test data into dataframe, set up numeric target**

In [3]:
df = pd.read_csv('image_data.csv', index_col=0)
df['target']='target'
df.target[df['type']=='clear'] = 0
df.target[df['type']=='steam'] = 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [4]:
df.head()

Unnamed: 0,wid,std,type,target
20171201001002,33,9.810708,clear,0
20171201002002,19,5.766281,clear,0
20171201003002,34,10.099505,clear,0
20171201004002,67,19.627786,clear,0
20171201013002,47,13.601454,clear,0


**set up learning data, use only wid as a data feature

In [5]:
feature_cols = ['wid']
X = df[list(feature_cols)].values

y = df['target'].values
y=y.astype('int') #y type is object, need integer

**logistic regression model**

In [6]:
from sklearn.linear_model import LogisticRegression
#instatiate
logreg = LogisticRegression()

#fit
logreg.fit(X,y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

**now we have a model, prepare new data for testing**

In [7]:
dfnew = pd.read_csv('new_image_data.csv', index_col=0)

In [8]:
feature_cols = ['wid']
X_new = dfnew[list(feature_cols)].values

In [9]:
#predict for new data, 0=clear, 1=steam
y = logreg.predict(X_new)
print (y)

[0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 0 1 1
 1 1 1 1 0 1 1 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1
 1 1 1 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0
 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1
 1 1 0 1 0 0 0 1 0 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0]


**evaluate classification**

In [10]:
# print the first 10 predicted responses
logreg.predict(X_new)[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [11]:
# print the first 10 predicted probabilities of class membership
logreg.predict_proba(X_new)[0:10, :]

array([[ 0.95493044,  0.04506956],
       [ 0.98608955,  0.01391045],
       [ 0.99234677,  0.00765323],
       [ 0.90450505,  0.09549495],
       [ 0.93406478,  0.06593522],
       [ 0.86362804,  0.13637196],
       [ 0.90450505,  0.09549495],
       [ 0.97484616,  0.02515384],
       [ 0.9906563 ,  0.0093437 ],
       [ 0.97933735,  0.02066265]])

**put results back into dataframe, so can later sort out into categories**

In [12]:
dfnew.head()

Unnamed: 0,wid
20180101000002,42
20180101001002,48
20180101002002,51
20180101003002,38
20180101004002,40


In [13]:
dfnew['category'] = y

In [14]:
dfnew.head()

Unnamed: 0,wid,category
20180101000002,42,0
20180101001002,48,0
20180101002002,51,0
20180101003002,38,0
20180101004002,40,0


In [17]:
basedir = '/home/sherburn/geonet/volcam_images/crop'
cleardir = os.path.join(basedir, 'clear')
steamdir = os.path.join(basedir, 'steam')

for index, row in dfnew.iterrows():
    file = str(index)+'WIWR.jpg'
    oldfile = os.path.join(basedir,file)
    #print (index, row['category'], file)
    if (row['category'] == 0): #clear
        newfile = os.path.join(cleardir, file)
    if (row['category'] == 1): #steam
        newfile = os.path.join(steamdir, file)
    cmd = 'cp '+oldfile+' '+newfile
    os.system(cmd)