<a href="https://colab.research.google.com/github/imrchen/aprendiendo/blob/master/casa_template.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# My Template

- actualizado en el <font size=6 color="firebrick">seis de agosto</font> de **<u>2021</u>**

## Most frequently used packages (imports) 

- [Pandas Options and settings](https://pandas.pydata.org/pandas-docs/stable/user_guide/options.html)
- [pandas.set_options](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.set_option.html?highlight=set_option#pandas.set_option)
- [Customizing Matplotlib with style sheets and rcParams](https://matplotlib.org/stable/tutorials/introductory/customizing.html)
- [RcParams](https://matplotlib.org/stable/api/matplotlib_configuration_api.html#matplotlib.RcParams)
- [Formatting Plotly Axes](https://plotly.com/python/axes/)


In [None]:
!pip3 install --upgrade plotly statsmodels
!pip3 install kaleido

In [None]:
import os
import sys
import time
import itertools
import functools
import pandas as pd
import numpy as np
import sklearn
from psutil import virtual_memory
from pandas import DataFrame, Series
from typing import List, Dict
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import pytz
import datetime as dt
import requests
import gdown
from prettytable import from_csv
import sklearn
import gzip
import plotly.express as px
import plotly.graph_objects as go
from plotly.graph_objects import Figure
from plotly.subplots import make_subplots


def timeit(func):
    """
    Decorator for measuring function's running time.
    """

    @functools.wraps(func)
    def _timeit_wrapper(*args, **kwargs):
        start_time = time.perf_counter()
        result = func(*args, **kwargs)
        print(
            f"\nprocessing time of {func.__qualname__}(): {time.perf_counter() - start_time:.5f} seconds"
        )
        return result

    return _timeit_wrapper


def print_now() -> None:
    print("Local Time = ", dt.datetime.now(pytz.timezone("Asia/Taipei")))


def timed_message(*args, **kwargs):
    print(*args, **kwargs)
    # print('')
    print_now()


def stamped(func):
    @functools.wraps(func)
    def do_action(*args, **kwargs):
        print("")
        print_now()
        return func(*args, **kwargs)

    return do_action


@stamped
def tprintf(*args, **kw):
    print(*args, **kw)


def find_runtime_info() -> None:
    print(f'Your runtime is running on {sys.platform}')
    ram_gb = virtual_memory().total / 1e9
    print('.............has {:.1f} gigabytes of available RAM\n'.format(ram_gb))
    print(f'Python version = {sys.version}')
    print(f'scikit-learn version = {sklearn.__version__}')
    try:
        print(f'TensorFlow version: {tf.__version__}')
        devices = pd.DataFrame(tf.config.list_physical_devices())
        print('\nCPU/GPU resource list:\n')
        print(devices.to_markdown())
    except NameError:
        print('TensorFlow not imported!')
        pass
    print('')


def determine_working_root(project_path: str) -> str:
    try:
        from google.colab import drive, files

        drive.mount("/content/drive")
        home_dir = "/content/drive/My Drive/"
        in_colab = True
    except ModuleNotFoundError:
        in_colab = False
        if sys.platform == "linux":
            home_dir = "/mnt/hgfs/"
        else:
            home_dir = Path.home()

    return os.path.join(home_dir, "" if in_colab else "Google Drive", project_path)


def fetch_file_via_requests(url, save_in_dir):
    local_filename = url.split("/")[-1]
    # NOTE the stream=True parameter below
    output_fpath = os.path.join(save_in_dir, local_filename)
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(output_fpath, "wb") as f:
            for chunk in r.iter_content(chunk_size=8192):
                if chunk:  # filter out keep-alive new chunks
                    f.write(chunk)
                    # f.flush()
    return output_fpath


def dropbox_link(did, fname) -> str:
    return "https://dl.dropboxusercontent.com/s/%s/%s" % (did, fname)


def retrieve_gdrive_file(remote_loc: str, local_name: str) -> None:
    download_id = remote_loc[:remote_loc.rfind('/view')].split('/')[-1]
    durl = f'https://drive.google.com/u/1/uc?id={download_id:s}&export=download'
    gdown.download(durl, local_name, quiet=False)


def gdrive_download_url(url: str) -> str:
    download_id = url[: url.rfind("/view")].split("/")[-1]
    return f"https://drive.google.com/u/1/uc?id={download_id:s}&export=download"


def retrieve_gdrive_file(remote_loc: str, local_name: str) -> None:
    download_id = remote_loc[: remote_loc.rfind("/view")].split("/")[-1]
    durl = f"https://drive.google.com/u/1/uc?id={download_id:s}&export=download"
    gdown.download(durl, local_name, quiet=False)


def ungzip(original, uncompressed):
    with gzip.open(original, "rb") as f_in:
        with open(uncompressed, "wb") as f_out:
            shutil.copyfileobj(f_in, f_out)


def print_csv(csv_name: str) -> None:
    with open(csv_name) as fp:
        mytable = from_csv(fp)
    print(mytable)


num_of_fwn = lambda df: df.isnull().any().sum()

def fwn(df: DataFrame) -> List:
    s = df.isnull().any(axis=0)
    return s[s==True].index.tolist()

def missing_info(df: DataFrame) ->  DataFrame:

    features_with_nan = fwn(df)

    rate = df[features_with_nan].isnull().mean().sort_values(ascending=False)
    sorted_fwn = rate.index.tolist()

    missing = df[sorted_fwn].isnull().sum()
    unique_value = [len(df[v].unique()) - 1 for v in sorted_fwn]
    mode_count = [df[v].value_counts().values[0] for v in sorted_fwn]
    return pd.DataFrame(
        data={
            'missing_rate': (100*rate).round(3),
            'missing': missing,
            'non_empty': len(df) - missing,
            'most_common': mode_count,
            'mode': df[sorted_fwn].mode().iloc[0],
            'unique': unique_value,
            'dtype': [df[x].dtype for x in features_with_nan]
        }
    )


def ez_countplot(
    df: DataFrame, feature: str, width=600, height=560, font_size=16, template="seaborn"
) -> Figure:

    d = df[feature].value_counts().sort_index().to_frame()
    if template == "gridon":
        template = "plotly_white"

    # cindex = [str(x) for x in d.index] if isinstance(d.index[0], np.number) else d.index

    fig = px.bar(
        d,
        x=d.index,
        y=feature,
        color=feature,
        text=[
            f"{d.loc[candidate][0]:d}<br>{(d.loc[candidate][0]/d[feature].sum())*100:.2f}%"
            for candidate in d.index
        ],
        color_continuous_scale=px.colors.sequential.RdBu_r,
    )
    # fig.add_hline(y=0, line_color='navy', line_width=1)
    fig.update_xaxes(type='category')
    fig.update_layout(
        template=template,
        width=width,
        height=height,
        font=dict(
            family="Source Hans TW, monospace", size=font_size, color="RebeccaPurple"
        ),
        xaxis_title=feature,
        yaxis_title="counts/ratiois",
    )
    return fig


def missing_values_chart(
    df: DataFrame, width=800, height=700, font_size=15, template="plotly"
) -> Figure:

    d = (1 - df.isnull().mean()).to_frame().rename(columns={0: "filling"})
    dn = (df.isnull().sum()).to_frame().rename(columns={0: "filling"})

    # colors = px.colors.sequential.Redor
    # fig = go.Figure()
    # fig.add_trace(go.Bar(
    #     y=d.index, x=d.filling, orientation='h',
    #     marker_color=px.colors.sequential.RdBu_r,
    #     textposition='auto',
    #     text=[f'{d.loc[e][0]*100:.1f}%' for e in d.index]))

    fig = px.bar(
        d,
        x="filling",
        color=d.columns[0],
        orientation="h",
        text=[f"{e:s} | {dn.loc[e][0]:d} / {d.loc[e][0]*100:.1f}%" for e in d.index],
        color_continuous_scale=colors,
    )

    fig.update_layout(
        width=width,
        height=height,
        template=template,
        yaxis_title="Features",
        #   xaxis_tickangle=25,
        font_size=font_size,
    )
    return fig


output_fig = lambda name: plt.savefig(f"./{name}.png", dpi=300)

REPO = "https://raw.githubusercontent.com/profundo-lab/bagel/main/"
REPO2 = "https://raw.githubusercontent.com/imrchen/aprendiendo/master/data/"

find_runtime_info()

tprintf("lección uno: helper functions loaded")


Your runtime is running on linux
.............has 13.6 gigabytes of available RAM

Python version = 3.7.11 (default, Jul  3 2021, 18:01:19) 
[GCC 7.5.0]
scikit-learn version = 0.22.2.post1
TensorFlow not imported!


Local Time =  2021-08-06 13:19:13.100518+08:00
lección uno: helper functions loaded


In [None]:
def find_runtime_info() -> None:
    print(f'Your runtime is running on {sys.platform}')
    ram_gb = virtual_memory().total / 1e9
    print('.............has {:.1f} gigabytes of available RAM\n'.format(ram_gb))
    print(f'Python version = {sys.version}')
    print(f'scikit-learn version = {sklearn.__version__}')
    try:
        print(f'TensorFlow version: {tf.__version__}')
        devices = pd.DataFrame(tf.config.list_physical_devices())
        print('\nCPU/GPU resource list:\n')
        print(devices.to_markdown())
    except NameError:
        print('TensorFlow not imported!')
        pass
    print('')


Your runtime is running on linux
.............has 13.6 gigabytes of available RAM

Python version = 3.7.11 (default, Jul  3 2021, 18:01:19) 
[GCC 7.5.0]
scikit-learn version = 0.22.2.post1
TensorFlow not imported!


In [None]:
!nvidia-smi

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



In [None]:
!free -h --si | awk  '/Mem:/{print $2}'

13G


In [None]:
!lscpu | grep 'CPU(s)'

CPU(s):              2
On-line CPU(s) list: 0,1
NUMA node0 CPU(s):   0,1


In [None]:
!lscpu 

Architecture:        x86_64
CPU op-mode(s):      32-bit, 64-bit
Byte Order:          Little Endian
CPU(s):              2
On-line CPU(s) list: 0,1
Thread(s) per core:  2
Core(s) per socket:  1
Socket(s):           1
NUMA node(s):        1
Vendor ID:           GenuineIntel
CPU family:          6
Model:               63
Model name:          Intel(R) Xeon(R) CPU @ 2.30GHz
Stepping:            0
CPU MHz:             2299.998
BogoMIPS:            4599.99
Hypervisor vendor:   KVM
Virtualization type: full
L1d cache:           32K
L1i cache:           32K
L2 cache:            256K
L3 cache:            46080K
NUMA node0 CPU(s):   0,1
Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single ssbd ibrs 

## Directory Setup

In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns

project_root = 'aprendizaje'
project = 'contribution'

working_root = determine_working_root(project_root)
data_dir = os.path.join(working_root, 'data', project)
figure_dir = os.path.join(working_root, 'figure', project)
predict_dir = os.path.join(working_root, 'prediction', project)
train_source = os.path.join(data_dir, 'train.csv')
test_source = os.path.join(data_dir, 'test.csv')

output_fig = lambda name: plt.savefig(
    os.path.join(figure_dir, name + '.png'),
    dpi=300
)
print(f'project code -> {project}')
print(f'data storage location -> {data_dir}')
timed_message('')

## 載入中文字型

In [None]:
%%time 

import os
import sys
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import requests
from matplotlib.font_manager import FontProperties


def determine_font_dest() -> str:
    mpl_path = mpl.__file__
    return mpl_path[:mpl_path.rfind('/')] + '/mpl-data/fonts/ttf'


# 載入中文字型
#

lanting_source = 'LanTingSong.ttf'           # plt.rcParams['font.sans-serif'] = ['FZLanTingSong']
hans_source = 'SourceHanSerifTW-Regular.ttf' # plt.rcParams['font.sans-serif'] = ['Source Han Serif TW']
hei_source = 'TaipeiSansTCBeta-Regular.ttf'  # plt.rcParams['font.sans-serif'] = ['Taipei Sans TC Beta']
genryu_source = 'GenRyuMin-R.ttc'            # plt.rcParams['font.sans-serif'] = ['GenRyuMin TW']

font_dest = determine_font_dest()

fonts = [
    ['vj0eq732aq36tto', 'SourceHanSerifTW-Regular.ttf'],
    ['t38q9m4gdaco9qz', 'LanTingSong.ttf'],
    ['w7yvxtou1x1dmkx', 'GenRyuMin-R.ttc'],
    ['1nm46fy1sahq1i5', 'TaipeiSansTCBeta-Regular.ttf']
]

for each in fonts:
    fetch_file_via_requests(dropbox_link(each[0], each[1]), font_dest)

song = FontProperties(fname=os.path.join(font_dest, hans_source))
hei = FontProperties(fname=os.path.join(font_dest, hei_source))
genruy = FontProperties(fname=os.path.join(font_dest, genryu_source))

tprintf('載入中文字型')


Local Time =  2021-07-21 18:46:37.097162+08:00
載入中文字型
CPU times: user 299 ms, sys: 229 ms, total: 528 ms
Wall time: 4.76 s


## Missing Values

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go



num_of_fwn = lambda df: df.isnull().any().sum()

def fwn(df: DataFrame) -> List:
    s = df.isnull().any(axis=0)
    return s[s==True].index.tolist()

def missing_info(df: DataFrame) ->  DataFrame:

    features_with_nan = fwn(df)

    rate = df[features_with_nan].isnull().mean().sort_values(ascending=False)
    sorted_fwn = rate.index.tolist()

    missing = df[sorted_fwn].isnull().sum()
    unique_value = [len(df[v].unique()) - 1 for v in sorted_fwn]
    mode_count = [df[v].value_counts().values[0] for v in sorted_fwn]
    return pd.DataFrame(
        data={
            'missing_rate': (100*rate).round(3),
            'missing': missing,
            'non_empty': len(df) - missing,
            'most_common': mode_count,
            'mode': df[sorted_fwn].mode().iloc[0],
            'unique': unique_value,
            'dtype': [df[x].dtype for x in features_with_nan]
        }
    )

## Regression Utilities

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.metrics import r2_score
import numpy as np
import statsmodels.api as sm
from numpy import ndarray


def ols_fit(x_vector: np.array, y_vector: np.array, *args, **kw):
    #
    # df['constant'] = 1
    # model = sm.OLS(endog=df['y'], exog=[np.array(df['constant', 'x'])
    #
    x_vec = sm.add_constant(x_vector)       # adding a constant
    return sm.OLS(endog=y_vector, exog=x_vec, *args, **kw).fit()


def polynomial_approx(predictor: ndarray, target: ndarray, degree: int, *args, **kwargs):
    if predictor.ndim == 1:
        x = predictor.reshape(-1, 1)
    poly_features = PolynomialFeatures(degree).fit_transform(x)
    model = sm.OLS(target, poly_features, *args, **kwargs).fit()
    return model


def polyfit_wrapper(xvec, yvec, degree = 3, xdomain = None):
    p = np.polyfit(xvec.flatten(), yvec.flatten(), degree)
    if xdomain is None:
        xdomain = np.linspace( min(xvec.flatten()), max(xvec.flatten()), 100 )

    return np.polyval(p, xdomain), 

def polynomial_approximation(x, y, deg, x_domain=None):

    the_model = make_pipeline(PolynomialFeatures(deg), LinearRegression())
    if x.ndim == 1:
        x = x[:, np.newaxis]
    the_model.fit(x, y)
    if x_domain is None:
        x_domain = x
    predicted_values = the_model.predict(x_domain)
    return the_model, predicted_values, r2_score(y, the_model.predict(x))

## Confusion Matrix

- [Scikit-learn: How to obtain True Positive, True Negative, False Positive and False Negative](https://stackoverflow.com/questions/31324218/scikit-learn-how-to-obtain-true-positive-true-negative-false-positive-and-fa)

![](https://i.stack.imgur.com/AuTKP.png)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from numpy import ndarray
from sklearn.metrics import confusion_matrix
from matplotlib.axes import Axes
import seaborn as sns


def confusion_counters(y_true: ndarray, y_pred: ndarray) -> tuple:

    cm = confusion_matrix(y_true, y_pred)

    FP = cm.sum(axis=0) - np.diag(cm)  
    FN = cm.sum(axis=1) - np.diag(cm)
    TP = np.diag(cm)
    TN = cm.sum() - (FP + FN + TP)

    # # Sensitivity, hit rate, recall, or true positive rate
    # TPR = TP/(TP+FN)
    # # Specificity or true negative rate
    # TNR = TN/(TN+FP) 
    # # Precision or positive predictive value
    # PPV = TP/(TP+FP)
    # # Negative predictive value
    # NPV = TN/(TN+FN)
    # # Fall out or false positive rate
    # FPR = FP/(FP+TN)
    # # False negative rate
    # FNR = FN/(TP+FN)
    # # False discovery rate
    # FDR = FP/(TP+FP)

    # len(FP) = numer of classes
    return np.array([TP, FP, FN, TN]).reshape(4, len(FP))


def class_confusion_matrix(y_true, y_pred, class_pos=1) -> ndarray:
    cm_data = confusion_counters(y_true, y_pred)
    num_classes = cm_data.shape[1]
    return cm_data[:, class_pos].reshape(num_classes, num_classes)


def plot_multilabel_cm(y_true, y_pred, classes="auto", figsize=(6, 5)) -> Axes:

    cm = confusion_matrix(y_true, y_pred)
    bscm = cm.copy()
    for i in range(cm.shape[0]):
        bscm[:, i]= cm[i, :]

    _, ax = plt.subplots(figsize=figsize, dpi=150)
    sns.heatmap(
        bscm, annot=True, fmt='d', 
        xticklabels = classes,
        yticklabels = classes,
        ax=ax)

    ax.set_xlabel("Actual Classes")
    ax.set_ylabel("Predicted Classes")
    return ax


tprintf("confusion matrix tools loaded")

Local Time =  2021-07-23 12:53:13.319303+08:00
confusion matrix tools loaded


## comparar models

In [None]:
from numpy import ndarray
from pandas import DataFrame
from sklearn.utils import resample
import pandas as pd
import numpy as np
from pandas import DataFrame
from numpy import ndarray
from collections import defaultdict
import itertools
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score
from imblearn.pipeline import make_pipeline


def resample_data(original_data: DataFrame, feature: str, ratio=1.) -> DataFrame:
    items = defaultdict()

    for e in original_data[feature].unique():
        items[e] = original_data[original_data[feature] == e]

    minor_item = original_data[feature].value_counts().sort_values(ascending=False).index[1]
    major_item = original_data[feature].value_counts().sort_values(ascending=False).index[0]
    mode_count = original_data[feature].value_counts().sort_values(ascending=False)[0]
    # minor_count = original_data[feature].value_counts().sort_values(ascending=False)[1]
    resample_count = int(mode_count * ratio)

    df_up = resample(
        items[minor_item],
        replace=True,
        n_samples=resample_count,
        random_state=0)

    return pd.concat([items[major_item], df_up], axis=0, ignore_index=True)

    
@timeit
def analizar_model(
    estimator, train_set: DataFrame, test_set: DataFrame, target: str
) -> ndarray:
    train_x = train_set.drop([target], axis=1)
    train_y = train_set.response
    test_x = test_set.drop([target], axis=1)
    test_y = test_set.response

    estimator.fit(train_x, train_y)
    y_pred = estimator.predict(test_x)
    return np.array(precision_recall_fscore_support(test_y, y_pred)).reshape(1, -1)


@timeit
def comparar_models(
    model_collections: dict, train_data, train_labels, k=5, scoring="accuracy"
) -> DataFrame:

    seed = 0
    data_fold = StratifiedKFold(n_splits=k, random_state=seed, shuffle=True)
    results = []

    for name in model_collections.keys():
        cv_results = cross_val_score(
            model_collections[name],
            train_data,
            train_labels,
            cv=data_fold,
            scoring=scoring,
        )
        results.append(cv_results)
        # print(f"{name:s}: {cv_results.mean():.4f} {cv_results.std():.4f}")

    d = pd.DataFrame(
        data=np.array(results), columns=np.arange(1, len(cv_results) + 1, 1).astype(str)
    )
    d["model"] = model_collections.keys()
    d.set_index(["model"], drop=True, inplace=True)
    return d


@timeit
def comparar_recall(
    model_collections: dict,
    train_data: DataFrame,
    train_labels: DataFrame,
    test_data: DataFrame,
    test_labels: DataFrame,
    scoring="accuracy",
) -> DataFrame:

    seed = 0
    # data_fold = StratifiedKFold(n_splits=k, random_state=seed, shuffle=True)
    results = []

    for name in model_collections.keys():
        model = model_collections[name]
        model.fit(train_data, train_labels)

        y_train_pred = model.predict(train_data)
        _, rtrain, _, _ = precision_recall_fscore_support(
            train_labels, y_train_pred, zero_division=1
        )
        y_pred = model.predict(test_data)
        p, rtest, f, s = precision_recall_fscore_support(
            test_labels, y_pred, zero_division=1
        )
        acc = accuracy_score(test_labels, y_pred)

        # combined = np.array([p, rtrain, rtest, f]).flatten()
        # combined = np.concatenate([np.array([acc]), combined], axis=0)

        nl = [[acc], p, rtrain, rtest, f]
        results.append(list(itertools.chain(*nl)))
        # results.append(combined)

    # print('before constructing dataframe')

    d = pd.DataFrame(
        data=np.array(results),
        columns=[
            "accuracy",
            "p(0)",
            "p(1)",
            "r(0)",
            "r(1)",
            "rt(0)",
            "rt(1)",
            "f(0)",
            "f(1)",
        ],
    )
    d["model"] = model_collections.keys()
    d.set_index(["model"], drop=True, inplace=True)
    return d

tprintf("comparar_model defined")


Local Time =  2021-07-22 18:15:01.328080+08:00
comparar_model defined



The module is deprecated in version 0.21 and will be removed in version 0.23 since we've dropped support for Python 2.7. Please rely on the official version of six (https://pypi.org/project/six/).


The sklearn.neighbors.base module is  deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.neighbors. Anything that cannot be imported from sklearn.neighbors is now part of the private API.



## 如何下載 Colab VM 硬碟檔案

In [None]:
import os
from google.colab import files

for f in os.listdir('./'):
    if os.path.isdir(f) or f.rfind('.png') < 0:
        continue
    files.download(f)

## Avialable scoring metrics


dict_keys(['explained_variance', 'r2', 'max_error', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_root_mean_squared_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'neg_brier_score', 'adjusted_rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'jaccard', 'jaccard_macro', 'jaccard_micro', 'jaccard_samples', 'jaccard_weighted'])

In [None]:
import sklearn.metrics
for x in sklearn.metrics.SCORERS.keys():
    print(x)

In [None]:
sklearn.metrics.SCORERS.keys()

dict_keys(['explained_variance', 'r2', 'max_error', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_root_mean_squared_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'neg_brier_score', 'adjusted_rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'jaccard', 'jaccard_macro', 'jaccard_micro', 'jaccard_samples', 'jaccard_weighted'])

## GPU info

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, 
and then re-execute this cell.


# ✅ Here we go....

# End of Library

## 回收暫存區

In [None]:
device = torch.device(
    'cuda' if torch.cuda.is_available() else 'cpu'
)
print('Torch Device set to -->', device)

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Torch Device set to --> cpu


In [None]:
#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

In [None]:
import numpy as np
import pandas as pd
import sklearn
import seaborn as sns
import statsmodels
import torch
import torchvision
import mlxtend
import lightgbm
import xgboost

print(f'numpy version = {np.__version__}')
print(f'pandas version = {pd.__version__}')
print(f'scikit-learn version = {sklearn.__version__}')
print(f'mlxtend version = {mlxtend.__version__}')
print(f'xgboost version = {xgboost.__version__}')
print(f'lightgbm version = {lightgbm.__version__}')
print(f'statsmodel version = {statsmodels.__version__}')
print(f'PyTorch version = {torch.__version__}')
print(f'TorchVision version = {torchvision.__version__}')

numpy version = 1.19.5
pandas version = 1.1.5
scikit-learn version = 0.22.2.post1
mlxtend version = 0.14.0
xgboost version = 0.90
lightgbm version = 2.2.3
statsmodel version = 0.10.2
PyTorch version = 1.9.0+cu102
TorchVision version = 0.10.0+cu102


In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# End......