# Logistic Regression example

In [2]:
// cargo install the newest version
// this may take a while, needs to download and install the library
:dep smartcore = { git = "https://github.com/smartcorelib/smartcore", branch = "development", features = ["datasets"] }

In [3]:
use smartcore::dataset::iris::load_dataset as iris_load;
use smartcore::dataset::breast_cancer::load_dataset as bc_load;
use smartcore::dataset::diabetes::load_dataset as db_load;
use smartcore::linear::logistic_regression::LogisticRegression;
use smartcore::linalg::basic::matrix::DenseMatrix;
// Model performance
use smartcore::metrics::accuracy;

use smartcore::model_selection::{cross_val_predict, cross_validate, KFold};

## basic prediction and accuracy

In [4]:
// Load Iris dataset
let iris_dataset = iris_load();

// Turn Iris dataset into NxM matrix
// Input data
let x: DenseMatrix<f32> = DenseMatrix::new(
    iris_dataset.num_samples,      // num rows
    iris_dataset.num_features,     // num columns
    iris_dataset.data,             // data as Vec
    false,                         // column_major
).unwrap();
// These are our target class labels
let y: Vec<u32> = iris_dataset.target;

// Fit Logistic Regression to Iris dataset
let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap();
let y_hat = lr.predict(&x).unwrap(); // Predict class labels

// Calculate training error
println!("accuracy: {}", accuracy(&y, &y_hat)); // Prints 0.98

accuracy: 0.9866666666666667


## basic cross validation

In [5]:
// we need to import this as LogisticRegression is a SupervisedEstimator
// and we need its traits
use smartcore::api::SupervisedEstimator

// This example is expired by
// https://scikit-learn.org/stable/auto_examples/model_selection/plot_cv_predict.html
// Load dataset
let breast_cancer_data = bc_load();
let x: DenseMatrix<f32> = DenseMatrix::new(
    breast_cancer_data.num_samples,
    breast_cancer_data.num_features,
    breast_cancer_data.data,
    false
).unwrap();
// These are our target values
let y = breast_cancer_data.target;
// cross-validated estimator
let results = cross_validate(
    LogisticRegression::new(),
    &x,
    &y,
    Default::default(),
    &KFold::default().with_n_splits(3),
    &accuracy,
)
.unwrap();
println!(
    "Test score: {}, training score: {}",
    results.mean_test_score(),
    results.mean_train_score()
);


Test score: 0.947294161329249, training score: 0.9639795398787205


## basic cross validation with prediction

In [9]:
use smartcore::linear::linear_regression::LinearRegression;

// Load Diabetes dataset
let diabetes_data = db_load();
let x = DenseMatrix::new(
    diabetes_data.num_samples,
    diabetes_data.num_features,
    diabetes_data.data,
    false
).unwrap();

// These are our target values
let y = diabetes_data.target;

// Generate cross-validated estimates for each input data point
let y_hat = cross_val_predict(
    LinearRegression::new(),
    &x,
    &y,
    Default::default(),
    &KFold::default().with_n_splits(10),
)
.unwrap();


println!(
    "y hat {:?}",
    &y_hat
);


y hat [210, 67, 173, 166, 129, 109, 74, 121, 157, 213, 93, 94, 113, 163, 102, 173, 208, 185, 146, 127, 124, 87, 118, 250, 166, 146, 101, 177, 130, 177, 155, 68, 266, 118, 77, 83, 207, 156, 239, 133, 156, 74, 147, 79, 220, 129, 141, 106, 71, 191, 153, 167, 133, 157, 138, 70, 208, 81, 108, 137, 113, 181, 58, 96, 117, 186, 147, 123, 113, 117, 75, 232, 138, 126, 153, 128, 187, 72, 168, 88, 174, 125, 62, 150, 54, 165, 45, 153, 74, 108, 81, 184, 191, 60, 109, 129, 207, 210, 128, 143, 166, 108, 151, 154, 154, 116, 71, 162, 233, 148, 31, 121, 151, 204, 297, 191, 213, 228, 168, 151, 156, 203, 223, 175, 174, 185, 58, 117, 93, 206, 251, 67, 110, 66, 143, 239, 54, 232, 252, 257, 159, 226, 168, 118, 178, 241, 191, 236, 114, 179, 206, 144, 200, 126, 148, 197, 150, 122, 85, 233, 81, 230, 142, 195, 147, 78, 58, 264, 223, 220, 47, 85, 219, 95, 164, 120, 160, 223, 102, 161, 181, 89, 175, 158, 202, 186, 196, 62, 155, 117, 190, 125, 87, 138, 160, 171, 98, 191, 140, 176, 96, 69, 162, 199, 172, 232, 160, 21

In [10]:
:dep plotters = { version = "^0.3.0", default_features = false, features = ["evcxr", "all_series"] }

use plotters::prelude::*;

//Assemble XY dataset for the scatter plot
let xy: Vec<Vec<u32>> = y.clone()
        .into_iter()
        .zip(y_hat.clone().into_iter())
        .map(|(x1, x2)| vec![x1, x2])
        .collect();

let diff: Vec<Vec<u32>> = y.clone()
        .into_iter()
        .zip(y_hat.clone().into_iter())
        .map(|(x1, x2)| vec![(u32::max(x1, x2) - u32::min(x1, x2)), x2])
        .collect();

// println!("{:?}",
//     &xy
// );

evcxr_figure((640, 480), |root| {
    // The following code will create a chart context
    let mut chart = ChartBuilder::on(&root)
        .caption("Y and Y_hat (green) and their difference (blue)", ("Arial", 20).into_font())
        .x_label_area_size(40)
        .y_label_area_size(40)
        .build_cartesian_2d(0u32..350u32, 0u32..400u32)?;
    
    chart.configure_mesh()
        .x_desc("Y")
        .y_desc("Y_hat")
        .draw()?;
    
    chart.draw_series(xy.iter().map(|x| Circle::new((x[0], x[1]), 3, GREEN.filled())))?;
    
    chart.draw_series(diff.iter().map(|x| Circle::new((x[0], x[1]), 3, BLUE.filled())))?;
    
    // You can alawys freely draw on the drawing backend
    let area = chart.plotting_area();
    let sd = 0.13;
    let two_sigma = sd * 2.0;
    area.draw(&Rectangle::new(
        [((0.5 - two_sigma) as u32, (0.5 - two_sigma)  as u32),
            ((0.5 + two_sigma)  as u32, (0.5 + two_sigma)  as u32)], 
        RED.mix(0.3).filled())
    )?;
    
    Ok(())
}).style("width: 60%")