In [1]:
# library untuk manipulasi data
import pandas as pd
import numpy as np

# library untuk visualisasi data
import matplotlib.pyplot as plt
import seaborn as sns

# library statistic
from scipy import stats
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
import statsmodels.formula.api as smf

## 1. Akuisisi Data

In [2]:
# load dataset
dataset = pd.read_csv("dataset/ekstrasi-fitur-kopra-nonTelungkup.csv")

In [3]:
np.round(dataset, 2)

Unnamed: 0,Mean-R,Mean-G,Mean-B,Mean-H,Mean-V,Mean-S,Mean-Gray,Standar-Deviasi,Luas,Perimeter,Contrast,Dissimilarity,Homogeneity,Energy,Correlation,Jenis Kopra
0,99.77,85.30,72.58,11.27,99.79,51.02,88.19,54.59,7933.5,2053.30,43.45,3.34,0.48,0.23,0.99,1
1,114.16,110.42,91.15,19.00,114.83,36.87,109.36,79.94,33489.0,1334.85,68.71,4.14,0.47,0.28,0.99,1
2,86.76,77.10,58.22,14.37,86.76,58.79,77.83,52.09,1958.5,573.85,37.74,2.92,0.52,0.24,0.99,1
3,80.40,71.18,55.91,12.92,80.40,52.58,72.24,52.31,2908.0,862.97,34.47,2.61,0.56,0.26,0.99,1
4,98.96,89.05,68.02,14.36,98.97,56.02,89.63,63.76,31867.5,706.14,51.52,3.06,0.54,0.26,0.99,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1834,65.70,53.85,38.52,13.25,65.72,78.25,55.65,38.13,274.5,94.81,55.97,3.63,0.45,0.21,0.98,3
1835,56.11,52.09,39.02,20.33,57.66,63.26,51.81,35.67,513.0,153.82,87.99,5.20,0.37,0.21,0.97,3
1836,57.39,46.50,29.44,12.42,57.39,83.30,47.83,37.25,150.0,172.85,76.82,4.53,0.43,0.27,0.97,3
1837,66.60,58.95,41.09,14.98,66.61,68.81,59.21,41.05,342.0,171.05,68.50,4.40,0.43,0.24,0.98,3


In [4]:
X = dataset.iloc[:,:-1]
Y = dataset.iloc[:,-1:]

In [5]:
scaler = MinMaxScaler()
df_norm = scaler.fit_transform(X)

In [6]:
df_korelasi = pd.concat([pd.DataFrame(df_norm), Y], axis=1)
np.round(df_korelasi, 3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,Jenis Kopra
0,0.664,0.641,0.677,0.084,0.664,0.291,0.652,0.485,0.046,0.339,0.070,0.137,0.508,0.148,0.951,1
1,0.802,0.887,0.898,0.163,0.808,0.163,0.862,0.845,0.195,0.220,0.152,0.229,0.484,0.274,0.977,1
2,0.539,0.561,0.506,0.115,0.539,0.362,0.549,0.449,0.011,0.095,0.051,0.088,0.625,0.167,0.956,1
3,0.478,0.503,0.478,0.100,0.478,0.305,0.494,0.452,0.017,0.143,0.041,0.053,0.749,0.242,0.965,1
4,0.656,0.678,0.622,0.115,0.656,0.337,0.666,0.615,0.185,0.117,0.096,0.105,0.679,0.225,0.964,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1834,0.337,0.334,0.271,0.104,0.337,0.539,0.329,0.251,0.002,0.016,0.110,0.171,0.407,0.097,0.794,3
1835,0.245,0.317,0.277,0.177,0.260,0.403,0.291,0.216,0.003,0.025,0.214,0.350,0.168,0.089,0.592,3
1836,0.257,0.262,0.163,0.095,0.257,0.585,0.252,0.238,0.001,0.029,0.178,0.273,0.356,0.257,0.683,3
1837,0.346,0.384,0.302,0.122,0.346,0.453,0.364,0.292,0.002,0.028,0.151,0.259,0.353,0.188,0.780,3


## 2. Uji korelasi Pearson, Spearman Rank, Kendall

In [7]:
pearson = df_korelasi.corr(method="pearson")

In [8]:
print("Hasil uji korelasi Pearson")
np.round(pearson,1).tail(1)

Hasil uji korelasi Pearson


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,Jenis Kopra
Jenis Kopra,-0.5,-0.5,-0.6,-0.1,-0.5,0.4,-0.5,-0.3,-0.1,-0.0,0.5,0.6,-0.5,0.4,-0.6,1.0


In [9]:
kendall = df_korelasi.corr(method="kendall")

In [10]:
print("Hasil uji korelasi Kendall-Tau")
np.round(kendall,1).tail(1)

Hasil uji korelasi Kendall-Tau


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,Jenis Kopra
Jenis Kopra,-0.4,-0.4,-0.5,-0.1,-0.4,0.3,-0.4,-0.3,-0.2,-0.1,0.5,0.6,-0.4,0.2,-0.6,1.0


In [11]:
spearman = df_korelasi.corr(method="spearman")

In [12]:
print("Hasil uji korelasi Spearman-Rank")
np.round(spearman,1).tail(1)

Hasil uji korelasi Spearman-Rank


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,Jenis Kopra
Jenis Kopra,-0.5,-0.5,-0.6,-0.1,-0.5,0.3,-0.5,-0.3,-0.3,-0.2,0.6,0.7,-0.6,0.3,-0.8,1.0


## 3. Analisa multivariate

In [13]:
import math

In [14]:
# load dataset
dataset = pd.read_csv("dataset/ekstrasi-fitur-kopra-nonTelungkup_v2.csv")

In [15]:
np.round(dataset, 2)

Unnamed: 0,Mean_R,Mean_G,Mean_B,Mean_H,Mean_V,Mean_S,Mean_Gray,Standar_Deviasi,Luas,Perimeter,Contrast,Dissimilarity,Homogeneity,Energy,Correlation,Jenis_Kopra
0,99.77,85.30,72.58,11.27,99.79,51.02,88.19,54.59,7933.5,2053.30,43.45,3.34,0.48,0.23,0.99,1
1,114.16,110.42,91.15,19.00,114.83,36.87,109.36,79.94,33489.0,1334.85,68.71,4.14,0.47,0.28,0.99,1
2,86.76,77.10,58.22,14.37,86.76,58.79,77.83,52.09,1958.5,573.85,37.74,2.92,0.52,0.24,0.99,1
3,80.40,71.18,55.91,12.92,80.40,52.58,72.24,52.31,2908.0,862.97,34.47,2.61,0.56,0.26,0.99,1
4,98.96,89.05,68.02,14.36,98.97,56.02,89.63,63.76,31867.5,706.14,51.52,3.06,0.54,0.26,0.99,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1834,65.70,53.85,38.52,13.25,65.72,78.25,55.65,38.13,274.5,94.81,55.97,3.63,0.45,0.21,0.98,3
1835,56.11,52.09,39.02,20.33,57.66,63.26,51.81,35.67,513.0,153.82,87.99,5.20,0.37,0.21,0.97,3
1836,57.39,46.50,29.44,12.42,57.39,83.30,47.83,37.25,150.0,172.85,76.82,4.53,0.43,0.27,0.97,3
1837,66.60,58.95,41.09,14.98,66.61,68.81,59.21,41.05,342.0,171.05,68.50,4.40,0.43,0.24,0.98,3


In [16]:
# Exp 1 = Warna
# Mean_R,Mean_G,Mean_B,Mean_H,Mean_V,Mean_S,Mean_Gray,Standar_Deviasi,Jenis_Kopra

# Exp 2 = Bentuk
# Luas,Perimeter,Jenis_Kopra

# Exp 3 = Tekstur
# Contrast,Dissimilarity,Homogeneity,Energy,Correlation,Jenis_Kopra

# Exp 4 = Warna + Bentuk
# Mean_R,Mean_G,Mean_B,Mean_H,Mean_V,Mean_S,Mean_Gray,Standar_Deviasi,Luas,Perimeter,Jenis_Kopra

# Exp 5 = Warna + Tekstur
# Mean_R,Mean_G,Mean_B,Mean_H,Mean_V,Mean_S,Mean_Gray,Standar_Deviasi,Contrast,Dissimilarity,Homogeneity,Energy,Correlation,Jenis_Kopra

# Exp 6 = Bentuk + Tekstur
# Luas,Perimeter,Contrast,Dissimilarity,Homogeneity,Energy,Correlation,Jenis_Kopra

# Exp 7 = Warna + Bentuk + Tekstur
# Mean_R,Mean_G,Mean_B,Mean_H,Mean_V,Mean_S,Mean_Gray,Standar_Deviasi,Luas,Perimeter,Contrast,Dissimilarity,Homogeneity,Energy,Correlation,Jenis_Kopra

In [17]:
# Membuat model regresi linier sederhana
model_lin = sm.OLS.from_formula("Jenis_Kopra ~ Mean_R + Mean_G + Mean_B + Mean_H + Mean_V + Mean_S + Mean_Gray + Standar_Deviasi", data=dataset)
result_lin = model_lin.fit()

# Menampilkan hasil regresi
print(result_lin.summary())

print("\n R - Exp 1 = " + str(np.round(math.sqrt(0.526), 2)))

                            OLS Regression Results                            
Dep. Variable:            Jenis_Kopra   R-squared:                       0.526
Model:                            OLS   Adj. R-squared:                  0.524
Method:                 Least Squares   F-statistic:                     254.0
Date:                Sat, 18 May 2024   Prob (F-statistic):          2.85e-290
Time:                        03:48:36   Log-Likelihood:                -1549.8
No. Observations:                1839   AIC:                             3118.
Df Residuals:                    1830   BIC:                             3167.
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           0.7839      0.185     

In [18]:
# Membuat model regresi linier sederhana
model_lin = sm.OLS.from_formula("Jenis_Kopra ~ Luas + Perimeter", data=dataset)
result_lin = model_lin.fit()

# Menampilkan hasil regresi
print(result_lin.summary())

print("\n R - Exp 2 = " + str(np.round(math.sqrt(0.022), 2)))

                            OLS Regression Results                            
Dep. Variable:            Jenis_Kopra   R-squared:                       0.022
Model:                            OLS   Adj. R-squared:                  0.021
Method:                 Least Squares   F-statistic:                     20.96
Date:                Sat, 18 May 2024   Prob (F-statistic):           9.95e-10
Time:                        03:48:36   Log-Likelihood:                -2215.8
No. Observations:                1839   AIC:                             4438.
Df Residuals:                    1836   BIC:                             4454.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      2.0311      0.027     74.309      0.0

In [19]:
# Membuat model regresi linier sederhana
model_lin = sm.OLS.from_formula("Jenis_Kopra ~ Contrast+Dissimilarity+Homogeneity+Energy+Correlation", data=dataset)
result_lin = model_lin.fit()

# Menampilkan hasil regresi
print(result_lin.summary())

print("\n R - Exp 3 = " + str(np.round(math.sqrt(0.654), 2)))


                            OLS Regression Results                            
Dep. Variable:            Jenis_Kopra   R-squared:                       0.654
Model:                            OLS   Adj. R-squared:                  0.653
Method:                 Least Squares   F-statistic:                     693.6
Date:                Sat, 18 May 2024   Prob (F-statistic):               0.00
Time:                        03:48:36   Log-Likelihood:                -1260.1
No. Observations:                1839   AIC:                             2532.
Df Residuals:                    1833   BIC:                             2565.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept        32.0612      2.254     14.225

In [20]:
# Membuat model regresi linier sederhana
model_lin = sm.OLS.from_formula("Jenis_Kopra ~ Mean_R+Mean_G+Mean_B+Mean_H+Mean_V+Mean_S+Mean_Gray+Standar_Deviasi+Luas+Perimeter", data=dataset)
result_lin = model_lin.fit()

# Menampilkan hasil regresi
print(result_lin.summary())

print("\n R - Exp 4 = " + str(np.round(math.sqrt(0.557), 2)))


                            OLS Regression Results                            
Dep. Variable:            Jenis_Kopra   R-squared:                       0.557
Model:                            OLS   Adj. R-squared:                  0.554
Method:                 Least Squares   F-statistic:                     229.7
Date:                Sat, 18 May 2024   Prob (F-statistic):          2.68e-314
Time:                        03:48:36   Log-Likelihood:                -1488.3
No. Observations:                1839   AIC:                             2999.
Df Residuals:                    1828   BIC:                             3059.
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           1.4403      0.188     

In [21]:
# Membuat model regresi linier sederhana
model_lin = sm.OLS.from_formula("Jenis_Kopra ~ Mean_R+Mean_G+Mean_B+Mean_H+Mean_V+Mean_S+Mean_Gray+Standar_Deviasi+Contrast+Dissimilarity+Homogeneity+Energy+Correlation", data=dataset)
result_lin = model_lin.fit()

# Menampilkan hasil regresi
print(result_lin.summary())

print("\n R - Exp 5 = " + str(np.round(math.sqrt(0.730), 2)))


                            OLS Regression Results                            
Dep. Variable:            Jenis_Kopra   R-squared:                       0.730
Model:                            OLS   Adj. R-squared:                  0.728
Method:                 Least Squares   F-statistic:                     380.4
Date:                Sat, 18 May 2024   Prob (F-statistic):               0.00
Time:                        03:48:36   Log-Likelihood:                -1031.3
No. Observations:                1839   AIC:                             2091.
Df Residuals:                    1825   BIC:                             2168.
Df Model:                          13                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           0.1032      4.069     

In [22]:
# Membuat model regresi linier sederhana
model_lin = sm.OLS.from_formula("Jenis_Kopra ~ Luas+Perimeter+Contrast+Dissimilarity+Homogeneity+Energy+Correlation", data=dataset)
result_lin = model_lin.fit()

# Menampilkan hasil regresi
print(result_lin.summary())

print("\n R - Exp 6 = " + str(np.round(math.sqrt(0.655), 2)))


                            OLS Regression Results                            
Dep. Variable:            Jenis_Kopra   R-squared:                       0.655
Model:                            OLS   Adj. R-squared:                  0.654
Method:                 Least Squares   F-statistic:                     496.5
Date:                Sat, 18 May 2024   Prob (F-statistic):               0.00
Time:                        03:48:36   Log-Likelihood:                -1258.2
No. Observations:                1839   AIC:                             2532.
Df Residuals:                    1831   BIC:                             2577.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept        28.8768      2.782     10.381

In [23]:
# Membuat model regresi linier sederhana
model_lin = sm.OLS.from_formula("Jenis_Kopra ~ Mean_R+Mean_G+Mean_B+Mean_H+Mean_V+Mean_S+Mean_Gray+Standar_Deviasi+Luas+Perimeter+Contrast+Dissimilarity+Homogeneity+Energy+Correlation", data=dataset)
result_lin = model_lin.fit()

# Menampilkan hasil regresi
print(result_lin.summary())

print("\n R - Exp 7 = " + str(np.round(math.sqrt(0.742), 2)))


                            OLS Regression Results                            
Dep. Variable:            Jenis_Kopra   R-squared:                       0.742
Model:                            OLS   Adj. R-squared:                  0.740
Method:                 Least Squares   F-statistic:                     349.7
Date:                Sat, 18 May 2024   Prob (F-statistic):               0.00
Time:                        03:48:36   Log-Likelihood:                -990.43
No. Observations:                1839   AIC:                             2013.
Df Residuals:                    1823   BIC:                             2101.
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept           0.5937      4.192     