## rpy2 3.3.2: Basic Usage
---

##### Reference: https://rpy2.github.io/doc/v3.0.x/html/generated_rst/pandas.html

### Loading Required libraries

In [1]:
import numpy as np
import pandas as pd
import rpy2.robjects as ro

from rpy2.robjects import r, pandas2ri
from rpy2.robjects.packages import importr
from rpy2.robjects.conversion import localconverter

try:
    import cPickle as pickle
except ModuleNotFoundError:
    import pickle

### Importing R packages

In [2]:
lib = importr('TSPred')

### Converting DataFrames into R objects

In [3]:
pd_df = pd.DataFrame({'int_values': [1,2,3],
                      'str_values': ['abc', 'def', 'ghi']})

pd_df

Unnamed: 0,int_values,str_values
0,1,abc
1,2,def
2,3,ghi


R data frame converted from a pandas data frame:

In [5]:
with localconverter(ro.default_converter + pandas2ri.converter):
  r_from_pd_df = ro.conversion.py2rpy(pd_df)

r_from_pd_df

int_values,str_values
...,...


The conversion is automatically happening when calling R functions. For example, when calling the R function base::summary:

In [5]:
base = importr('base')

with localconverter(ro.default_converter + pandas2ri.converter):
  df_summary = base.summary(pd_df)

print(df_summary)

['Min.   :1.0  ' '1st Qu.:1.5  ' 'Median :2.0  ' 'Mean   :2.0  '
 '3rd Qu.:2.5  ' 'Max.   :3.0  ' 'Length:3          ' 'Class :character  '
 'Mode  :character  ' NA_character_ NA_character_ NA_character_]


### Transferring R data sets into Python

By activating the pandas conversion of rpy2

In [6]:
pandas2ri.activate()

r.data('iris')
r['iris'].head()

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
1,5.1,3.5,1.4,0.2,setosa
2,4.9,3.0,1.4,0.2,setosa
3,4.7,3.2,1.3,0.2,setosa
4,4.6,3.1,1.5,0.2,setosa
5,5.0,3.6,1.4,0.2,setosa


In [7]:
pandas2ri.deactivate()

or explicitly converting...

In [8]:
r_df = ro.DataFrame({'int_values': ro.IntVector([1,2,3]),
                     'str_values': ro.StrVector(['abc', 'def', 'ghi'])})

r_df

int_values,str_values
...,...


It can be converted to a pandas data frame using the same converter:

In [9]:
with localconverter(ro.default_converter + pandas2ri.converter):
  pd_from_r_df = ro.conversion.rpy2py(r_df)

pd_from_r_df

Unnamed: 0,int_values,str_values
1,1,abc
2,2,def
3,3,ghi


## Further Examples
---

### Defining auxiliary functions

In [3]:
def r2py(r_df):
    with localconverter(ro.default_converter + pandas2ri.converter):
      py_df = ro.conversion.rpy2py(r_df)
    return py_df

def py2r(pd_df):
    with localconverter(ro.default_converter + pandas2ri.converter):
      r_df = ro.conversion.py2rpy(pd_df)
    return r_df

def LV2dict(lv):
    if type(lv) is ro.vectors.ListVector:
        dct = dict(zip(lv.names, list(lv)))
    return dct

def lv2py(lv,idx=[]):
    o = lv
    if len(idx) > 0:
        for i in idx:
            o = LV2dict(o)[i]
        o = r2py(o)
    else:
        o = LV2dict(lv)
    return o

def applyRfunc(pd_df,rfunc,par=[]):
    with localconverter(ro.default_converter + pandas2ri.converter):
      df = rfunc(pd_df,*par)
    return df


def save_object(obj, filename):
    with open(filename, 'wb') as output:  # Overwrites any existing file.
        pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL)

def load_object(obj, filename):
    with open(filename, 'rb') as input:
        obj = pickle.load(input)
    return obj

### Loading functions from .R

In [4]:
r.source("https://raw.githubusercontent.com/eogasawara/mylibrary/master/myPreprocessing.R")
r.source("https://raw.githubusercontent.com/eogasawara/mylibrary/master/myPrediction.R")

R[write to console]: Loading required package: ggplot2

R[write to console]: Learn more about the underlying theory at https://ggplot2-book.org/

R[write to console]: Loading required package: scales

R[write to console]: Loading required package: ggpubr

R[write to console]: Loading required package: magrittr

R[write to console]: Loading required package: reshape

R[write to console]: Loading required package: caret

R[write to console]: Loading required package: lattice

R[write to console]: Loading required package: MASS

R[write to console]: Loading required package: DMwR

R[write to console]: Loading required package: grid

R[write to console]: Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 

R[write to console]: Loading required package: dplyr

R[write to console]: 
Attaching package: ‘dplyr’


R[write to console]: The following object is masked from ‘package:MASS’:

    select


R[write to console]: The following object is maske

0,1
value,[RTYPES.CLOSXP]
visible,[RTYPES.LGLSXP]


### Loading Iris dataset

#### From scikit-learn:

In [73]:
from sklearn import datasets

iris = datasets.load_iris()

Converting to pandas DataFrame

In [74]:
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['Species'] = iris.target
iris_df['Species'] = iris_df.Species.replace(dict(enumerate(iris.target_names)))

#### From seaborn:

In [5]:
import seaborn as sns
iris_df = sns.load_dataset('iris')

### Initial data exploration

In [6]:
iris_df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


#### Summary from Python

In [107]:
print(iris_df.shape)

iris_df.describe()

(150, 5)


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


#### Summary from R

In [120]:
base = importr('base')

applyRfunc(iris_df,base.summary)

array(['Min.   :4.300  ', '1st Qu.:5.100  ', 'Median :5.800  ',
       'Mean   :5.843  ', '3rd Qu.:6.400  ', 'Max.   :7.900  ',
       'Min.   :2.000  ', '1st Qu.:2.800  ', 'Median :3.000  ',
       'Mean   :3.057  ', '3rd Qu.:3.300  ', 'Max.   :4.400  ',
       'Min.   :1.000  ', '1st Qu.:1.600  ', 'Median :4.350  ',
       'Mean   :3.758  ', '3rd Qu.:5.100  ', 'Max.   :6.900  ',
       'Min.   :0.100  ', '1st Qu.:0.300  ', 'Median :1.300  ',
       'Mean   :1.199  ', '3rd Qu.:1.800  ', 'Max.   :2.500  ',
       'Length:150        ', 'Class :character  ', 'Mode  :character  ',
       NA_character_, NA_character_, NA_character_], dtype=object)

### Building samples

In [7]:
iris_tt = applyRfunc(iris_df,r['sample.random'],par=[0.8])

iris_train = r2py(iris_tt[0])
iris_test = r2py(iris_tt[1])

print("Dim train:",iris_train.shape)
print("Dim test: ",iris_test.shape)

Dim train: (120, 5)
Dim test:  (30, 5)


In [9]:
tbl_iris = pd.DataFrame(iris_df.species.value_counts()).T
tbl_iris_train = pd.DataFrame(iris_train.species.value_counts()).T
tbl_iris_test = pd.DataFrame(iris_test.species.value_counts()).T

tbl = tbl_iris.append([tbl_iris_train, tbl_iris_test], ignore_index=True)
tbl.index = ["dataset", "training", "test"]

tbl

Unnamed: 0,setosa,versicolor,virginica
dataset,50,50,50
training,37,43,40
test,13,7,10


### Training models

#### Majority class baseline prediction (Zero Rule)

In [10]:
model = ZeroRule_model = applyRfunc(iris_train,r['class_ZeroRule'],par=["species"])

save_object(model, 'ZeroRule_model.pkl')

In [11]:
model = load_object(model, 'ZeroRule_model.pkl')

lv2py(model,idx=["train","metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.572222,0.358333,0.358333,0.679167,0.358333,0.358333


In [296]:
test = applyRfunc(model,r['class_test'],par=[iris_test,"species"])

lv2py(test,idx=["metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.511111,0.266667,0.266667,0.633333,0.266667,0.266667


In [297]:
print(lv2py(test,idx=["conf_mat"]))

[[ 9]
 [ 8]
 [13]]


#### Decision Tree
Training the model, presenting the level of adjustment, quality of prediction, and confusion matrix.

In [12]:
model = tree_model = applyRfunc(iris_train,r['class_tree'],par=["species"])

save_object(model, 'tree_model.pkl')

In [13]:
model = load_object(model, 'tree_model.pkl')

lv2py(model,idx=["train","metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.977778,0.966667,0.966667,0.983333,0.966667,0.966667


In [14]:
test = applyRfunc(model,r['class_test'],par=[iris_test,"species"])

lv2py(test,idx=["metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,1.0,1.0,1.0,1.0,1.0,1.0


In [279]:
print(lv2py(test,idx=["conf_mat"]))

[[ 9  0  0]
 [ 0  8  0]
 [ 0  0 13]]


#### Naive Bayes

In [14]:
model = nb_model = applyRfunc(iris_train,r['class_naiveBayes'],par=["species"])

save_object(model, 'nb_model.pkl')

In [15]:
model = load_object(model, 'nb_model.pkl')

lv2py(model,idx=["train","metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.983333,0.975,0.975,0.9875,0.975,0.975


In [284]:
test = applyRfunc(model,r['class_test'],par=[iris_test,"species"])

lv2py(test,idx=["metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.977778,0.966667,0.966667,0.983333,0.966667,0.966667


In [285]:
print(lv2py(test,idx=["conf_mat"]))

[[ 9  0  0]
 [ 0  8  0]
 [ 0  1 12]]


In [286]:
print(lv2py(nb_model,idx=["model"]))


Naive Bayes Classifier for Discrete Predictors

Call:
naiveBayes.default(x = X, y = Y, laplace = laplace)

A-priori probabilities:
Y
    setosa versicolor  virginica 
 0.3416667  0.3500000  0.3083333 

Conditional probabilities:
            sepal_length
Y                [,1]      [,2]
  setosa     5.012195 0.3280207
  versicolor 5.888095 0.4954438
  virginica  6.556757 0.6304939

            sepal_width
Y                [,1]      [,2]
  setosa     3.409756 0.3740353
  versicolor 2.769048 0.3204327
  virginica  2.943243 0.3201914

            petal_length
Y                [,1]      [,2]
  setosa     1.470732 0.1764141
  versicolor 4.235714 0.4937766
  virginica  5.548649 0.5520777

            petal_width
Y                 [,1]      [,2]
  setosa     0.2487805 0.1003044
  versicolor 1.3261905 0.2095984
  virginica  2.0324324 0.2858263




#### Random forest

In [21]:
model = rf_model = applyRfunc(iris_train,r['class_randomForest'],par=["species"])

save_object(model, 'rf_model.pkl')

In [22]:
model = load_object(model, 'rf_model.pkl')

lv2py(model,idx=["train","metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,1.0,1.0,1.0,1.0,1.0,1.0


In [288]:
test = applyRfunc(model,r['class_test'],par=[iris_test,"species"])

lv2py(test,idx=["metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.977778,0.966667,0.966667,0.983333,0.966667,0.966667


In [289]:
print(lv2py(test,idx=["conf_mat"]))

[[ 9  0  0]
 [ 0  8  0]
 [ 0  1 12]]


### Prepare dataset for Machine Learning

In [23]:
iris_train_n = applyRfunc(iris_train,r['normalize.minmax'])
iris_test_n = applyRfunc(iris_test,r['normalize.minmax'],par=[lv2py(iris_train_n,idx=["norm.set"])])

#### Neural Networks - MLP using nnet

In [24]:
model = mlp_nnet_model = applyRfunc(iris_train,r['class_mlp_nnet'],par=["species"])

save_object(model, 'mlp_nnet_model.pkl')
model = load_object(model, 'mlp_nnet_model.pkl')

lv2py(model,idx=["train","metrics"])

# weights:  27
initial  value 106.995912 
iter  10 value 64.447802
iter  20 value 43.632283
iter  30 value 42.635308
iter  40 value 20.789353
iter  50 value 7.820891
iter  60 value 6.918799
iter  70 value 6.644402
iter  80 value 6.249118
iter  90 value 6.167066
iter 100 value 6.164221
iter 110 value 6.164097
final  value 6.164096 
converged


Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.994444,0.991667,0.991667,0.995833,0.991667,0.991667


In [34]:
test = applyRfunc(model,r['class_test'],par=[iris_test,"species"])

lv2py(test,idx=["metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.822222,0.733333,0.733333,0.866667,0.733333,0.733333


In [305]:
print(lv2py(test,idx=["conf_mat"]))

[[ 9  0  0]
 [ 0  8  0]
 [ 0  1 12]]


#### Neural Networks - MLP using RSNNS

In [25]:
model = mlp_rsnns_model = applyRfunc(iris_train,r['class_mlp_RSNNS'],par=["species"])

save_object(model, 'mlp_rsnns_model.pkl')
model = load_object(model, 'mlp_rsnns_model.pkl')

lv2py(model,idx=["train","metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.994444,0.991667,0.991667,0.995833,0.991667,0.991667


In [307]:
test = applyRfunc(model,r['class_test'],par=[iris_test,"species"])

lv2py(test,idx=["metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.955556,0.933333,0.933333,0.966667,0.933333,0.933333


In [308]:
print(lv2py(test,idx=["conf_mat"]))

[[ 9  0  0]
 [ 0  8  0]
 [ 0  2 11]]


#### Neural Networks - RBF using RSNNS

In [26]:
model = rbf_rsnns_model = applyRfunc(iris_train,r['class_rbf_RSNNS'],par=["species"])

save_object(model, 'rbf_rsnns_model.pkl')
model = load_object(model, 'rbf_rsnns_model.pkl')

lv2py(model,idx=["train","metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.988889,0.983333,0.983333,0.991667,0.983333,0.983333


In [310]:
test = applyRfunc(model,r['class_test'],par=[iris_test,"species"])

lv2py(test,idx=["metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.977778,0.966667,0.966667,0.983333,0.966667,0.966667


In [311]:
print(lv2py(test,idx=["conf_mat"]))

[[ 9  0  0]
 [ 0  8  0]
 [ 0  1 12]]


#### Creating a SVM with RBF kernel

In [28]:
model = svm_rbf_model = applyRfunc(iris_train,r['class_svm_rbf'],par=["species"])

save_object(model, 'svm_rbf_model.pkl')
model = load_object(model, 'svm_rbf_model.pkl')

lv2py(model,idx=["train","metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.994444,0.991667,0.991667,0.995833,0.991667,0.991667


In [313]:
test = applyRfunc(model,r['class_test'],par=[iris_test,"species"])

lv2py(test,idx=["metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.977778,0.966667,0.966667,0.983333,0.966667,0.966667


In [314]:
print(lv2py(test,idx=["conf_mat"]))

[[ 9  0  0]
 [ 0  8  0]
 [ 0  1 12]]


#### Creating a SVM with polynomial kernel

In [29]:
model = svm_poly_model = applyRfunc(iris_train,r['class_svm_poly'],par=["species"])

save_object(model, 'svm_poly_model.pkl')
model = load_object(model, 'svm_poly_model.pkl')

lv2py(model,idx=["train","metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.994444,0.991667,0.991667,0.995833,0.991667,0.991667


In [316]:
test = applyRfunc(model,r['class_test'],par=[iris_test,"species"])

lv2py(test,idx=["metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.977778,0.966667,0.966667,0.983333,0.966667,0.966667


In [314]:
print(lv2py(test,idx=["conf_mat"]))

[[ 9  0  0]
 [ 0  8  0]
 [ 0  1 12]]


#### Creating a SVM with sigmoid kernel

In [32]:
model = svm_sigmoid_model = applyRfunc(iris_train,r['class_svm_sigmoid'],par=["species"])

save_object(model, 'svm_sigmoid_model.pkl')
model = load_object(model, 'svm_sigmoid_model.pkl')

lv2py(model,idx=["train","metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.822222,0.733333,0.733333,0.866667,0.733333,0.733333


In [33]:
test = applyRfunc(model,r['class_test'],par=[iris_test,"species"])

lv2py(test,idx=["metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.822222,0.733333,0.733333,0.866667,0.733333,0.733333


In [319]:
print(lv2py(test,idx=["conf_mat"]))

[[9 0 0]
 [0 7 1]
 [0 4 9]]


#### knn prediction

In [31]:
model = knn_model = applyRfunc(iris_train,r['class_knn'],par=["species",3])

save_object(model, 'knn_model.pkl')
model = load_object(model, 'knn_model.pkl')

lv2py(model,idx=["train","metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.977778,0.966667,0.966667,0.983333,0.966667,0.966667


In [321]:
test = applyRfunc(model,r['class_test'],par=[iris_test,"species"])

lv2py(test,idx=["metrics"])

Unnamed: 0,accuracy,f1,sensitivity,specificity,precision,recall
1,0.977778,0.966667,0.966667,0.983333,0.966667,0.966667


In [322]:
print(lv2py(test,idx=["conf_mat"]))

[[ 9  0  0]
 [ 0  8  0]
 [ 0  1 12]]
