In [193]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score 

In [194]:
DATA_FOLDER = 'data/'
raw_assay = pd.read_csv(f'{DATA_FOLDER}BS_assay.csv')
raw_block_model = pd.read_csv(f'{DATA_FOLDER}BS_block_model.csv')

In [195]:
def calculate_measures(y, y_pred):
    m = {}
    
    m['score'] = r2_score(y, y_pred)
    # m['intercept'] = model.intercept_
    # m['slope'] = model.coef_
    m['mse'] = mean_squared_error(y, y_pred)
    m['rmse'] = np.sqrt(m['mse'])
    return m



In [196]:
DROP_COLS = ['HOLEID', 'Comment', 'Veins', 'Snip_Veins', 'Snip_veins_sorted']
assay = raw_assay.drop(DROP_COLS, axis=1)

assay_col_rename = {}
ASSAY_COLS = assay.columns.values.tolist()
for col_name in ASSAY_COLS:
    if "_OLDISKUT" in col_name:
        assay_col_rename[col_name] = col_name[:-9]
assay.rename(columns=assay_col_rename, inplace=True)
assay.columns

block_model = raw_block_model.drop('NSR', axis=1)


In [197]:
# Get measures for all the individual columns
bm_measures = {}
df = block_model

GOLD_COL = 'AU'
dfX, y = df.drop(GOLD_COL, axis=1), df[GOLD_COL]
X_COLS = dfX.columns.values.tolist()

for col in X_COLS:
    X = np.reshape(list(dfX[col]), (-1, 1))

    model = LinearRegression()
    model.fit(X, y)
    
    y_pred = model.predict(X)
    bm_measures[col] = calculate_measures(y, y_pred)

In [198]:
# Do analysis for multiple columns

GOLD_COL = 'AU'
dfX, y = df.drop(GOLD_COL, axis=1), df[GOLD_COL]
X_COLS = dfX.columns.values.tolist()

sets = {
    'X-Y-Z': ['X', 'Y', 'Z'],
    'CU-MO': ['CU', 'MO'],
    'CU-AG-MO': ['CU', 'AG', 'MO'],
    'ALL': X_COLS,
}

for name, set in sets.items():
    X = dfX[set]

    model = LinearRegression()
    model.fit(X, y)
    y_pred = model.predict(X)

    bm_measures[name] = calculate_measures(y, y_pred)

In [199]:
mdf = pd.DataFrame(bm_measures)
mdf.transpose()

Unnamed: 0,score,mse,rmse
X,0.002867,0.017285,0.131471
Y,9e-05,0.017333,0.131654
Z,0.034334,0.016739,0.12938
CU,0.166107,0.014455,0.120229
AG,0.073796,0.016055,0.126709
MO,0.006482,0.017222,0.131233
X-Y-Z,0.054019,0.016398,0.128055
CU-MO,0.276361,0.012544,0.111999
CU-AG-MO,0.297997,0.012169,0.110312
ALL,0.38275,0.0107,0.103439


In [200]:
# Do some preprocessing for column names
assay.columns

Index(['SAMPFROM', 'SAMPTO', 'AG_PPM', 'AS_PPM', 'AU_PPM', 'BA_PPM', 'BI_PPM',
       'CD_PPM', 'CO_PPM', 'CR_PPM', 'CU_PCT', 'FE_PCT', 'K_PCT', 'MO_PCT',
       'NA_PCT', 'PB_PCT', 'S_PCT', 'SB_PPM'],
      dtype='object')

In [201]:
# now for assay
as_measures = {}
df = assay

df = df.dropna(axis=0)
print(len(df))

GOLD_COL = 'AU_PPM'
dfX, y = df.drop(GOLD_COL, axis=1), df[GOLD_COL]
X_COLS = dfX.columns.values.tolist()

for col in X_COLS:
    X = np.reshape(list(dfX[col]), (-1, 1))

    model = LinearRegression()
    model.fit(X, y)
    
    y_pred = model.predict(X)
    as_measures[col] = calculate_measures(y, y_pred)

2353


In [202]:
sets = {
    'ALL': X_COLS,
}

for name, set in sets.items():
    X = dfX[set]

    model = LinearRegression()
    model.fit(X, y)
    y_pred = model.predict(X)

    as_measures[name] = calculate_measures(y, y_pred)

In [203]:
mdf = pd.DataFrame(as_measures)
mdf.transpose()

Unnamed: 0,score,mse,rmse
SAMPFROM,0.002908,2.667414,1.633222
SAMPTO,0.002932,2.667349,1.633202
AG_PPM,0.196132,2.1505,1.466458
AS_PPM,0.011688,2.643925,1.626015
BA_PPM,0.010716,2.646524,1.626814
BI_PPM,0.105422,2.393169,1.546987
CD_PPM,0.087405,2.441366,1.562487
CO_PPM,0.073972,2.477302,1.573945
CR_PPM,0.00301,2.667139,1.633138
CU_PCT,0.096285,2.417611,1.554867


In [206]:
# now for assay
as_measures = {}
df = assay

COLS = ['SAMPFROM', 'SAMPTO', 'AG_PPM', 'AU_PPM', 'CU_PCT', 'MO_PCT']
df = df[COLS]
df = df.dropna(axis=0)
print(len(df))

GOLD_COL = 'AU_PPM'
dfX, y = df.drop(GOLD_COL, axis=1), df[GOLD_COL]
X_COLS = dfX.columns.values.tolist()

for col in X_COLS:
    X = np.reshape(list(dfX[col]), (-1, 1))

    model = LinearRegression()
    model.fit(X, y)
    
    y_pred = model.predict(X)
    as_measures[col] = calculate_measures(y, y_pred)

9026


In [207]:
sets = {
    'ALL': X_COLS,
}

for name, set in sets.items():
    X = dfX[set]

    model = LinearRegression()
    model.fit(X, y)
    y_pred = model.predict(X)

    as_measures[name] = calculate_measures(y, y_pred)

In [208]:
mdf = pd.DataFrame(as_measures)
mdf.transpose()

Unnamed: 0,score,mse,rmse
SAMPFROM,0.003681,1.864359,1.365415
SAMPTO,0.003788,1.864159,1.365342
AG_PPM,0.000352,1.870589,1.367695
CU_PCT,0.083967,1.714125,1.309246
MO_PCT,0.005434,1.86108,1.364214
ALL,0.151635,1.587502,1.259961
