In [None]:
%%R
# Charger le fichier credit.csv
credit_data <- read.csv("credit.csv")

# Vérifier si la colonne 'default' existe et contient des valeurs
if("default" %in% colnames(credit_data)) {
    # Convertir la colonne 'default' en facteur et assigner à 'credit_risk'
    credit_data$credit_risk <- as.factor(credit_data$default)
} else {
    stop("La colonne 'default' n'existe pas dans les données.")
}

# Afficher les premières lignes pour vérifier
head(credit_data)

# Afficher la structure des données
str(credit_data)

# Afficher le nombre d'occurrences des intervalles de checking_balance
table(credit_data$checking_balance)

# Afficher le nombre d'occurrences des intervalles de savings_balance
table(credit_data$savings_balance)

# Afficher les statistiques de months_loan_duration
summary(credit_data$months_loan_duration)

# Afficher les proportions relatives aux paiements et non-paiements
table(credit_data$credit_risk)
prop.table(table(credit_data$credit_risk))

# Répartition des données en 90% pour l'apprentissage et 10% pour les tests
set.seed(123)  # Pour la reproductibilité
train_index <- sample(1:nrow(credit_data), 0.9 * nrow(credit_data))
train_data <- credit_data[train_index, ]
test_data <- credit_data[-train_index, ]

# Vérifier les proportions dans l'ensemble d'entraînement
prop.table(table(train_data$credit_risk))

# Vérifier les proportions dans l'ensemble de test
prop.table(table(test_data$credit_risk))

# Installer et charger le package C5.0
install.packages("C50")
library(C50)

# Entraîner le modèle C5.0
credit_model <- C5.0(credit_risk ~ ., data = train_data)

# Afficher le modèle
summary(credit_model)

# Prédictions sur l'ensemble de test
predictions <- predict(credit_model, test_data)

# Afficher la matrice de confusion
table(predictions, test_data$credit_risk)

# Calculer la précision
accuracy <- sum(predictions == test_data$credit_risk) / nrow(test_data)
print(paste("Accuracy: ", accuracy))

# Installer et charger le package gmodels
install.packages("gmodels")
library(gmodels)

# Utiliser CrossTable pour évaluer le modèle
CrossTable(predictions, test_data$credit_risk)



'data.frame':	1000 obs. of  22 variables:
 $ checking_balance    : chr  "< 0 DM" "1 - 200 DM" "unknown" "< 0 DM" ...
 $ months_loan_duration: int  6 48 12 42 24 36 24 36 12 30 ...
 $ credit_history      : chr  "critical" "repaid" "critical" "repaid" ...
 $ purpose             : chr  "radio/tv" "radio/tv" "education" "furniture" ...
 $ amount              : int  1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ...
 $ savings_balance     : chr  "unknown" "< 100 DM" "< 100 DM" "< 100 DM" ...
 $ employment_length   : chr  "> 7 yrs" "1 - 4 yrs" "4 - 7 yrs" "4 - 7 yrs" ...
 $ installment_rate    : int  4 2 2 2 3 2 3 2 2 4 ...
 $ personal_status     : chr  "single male" "female" "single male" "single male" ...
 $ other_debtors       : chr  "none" "none" "none" "guarantor" ...
 $ residence_history   : int  4 2 3 4 4 4 4 2 4 2 ...
 $ property            : chr  "real estate" "real estate" "real estate" "building society savings" ...
 $ age                 : int  67 22 49 45 53 35 53 35 61 28 ..

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)
also installing the dependencies ‘plyr’, ‘reshape2’, ‘libcoin’, ‘mvtnorm’, ‘Formula’, ‘inum’, ‘Cubist’, ‘partykit’

trying URL 'https://cran.rstudio.com/src/contrib/plyr_1.8.9.tar.gz'
Content type 'application/x-gzip' length 401488 bytes (392 KB)
downloaded 392 KB

trying URL 'https://cran.rstudio.com/src/contrib/reshape2_1.4.4.tar.gz'
Content type 'application/x-gzip' length 37307 bytes (36 KB)
downloaded 36 KB

trying URL 'https://cran.rstudio.com/src/contrib/libcoin_1.0-10.tar.gz'
Content type 'application/x-gzip' length 847638 bytes (827 KB)
downloaded 827 KB

trying URL 'https://cran.rstudio.com/src/contrib/mvtnorm_1.3-3.tar.gz'
Content type 'application/x-gzip' length 837946 bytes (818 KB)
downloaded 818 KB

trying URL 'https://cran.rstudio.com/src/contrib/Formula_1.2-5.tar.gz'
Content type 'application/x-gzip' length 128259 bytes (125 KB)
downloaded 125 KB

trying URL 'https://cran.rstudio.com/src/

%%R  
# Ce script analyse un dataset de crédit, transforme la variable cible et explore les données.  
# Un modèle C5.0 est entraîné et évalué sur un ensemble de test pour mesurer sa précision.  
# La matrice de confusion et CrossTable sont utilisés pour interpréter les résultats.  


In [None]:
%%R
colnames(credit_data)


 [1] "checking_balance"     "months_loan_duration" "credit_history"      
 [4] "purpose"              "amount"               "savings_balance"     
 [7] "employment_length"    "installment_rate"     "personal_status"     
[10] "other_debtors"        "residence_history"    "property"            
[13] "age"                  "installment_plan"     "housing"             
[16] "existing_credits"     "job"                  "dependents"          
[19] "telephone"            "foreign_worker"       "default"             


In [None]:
%load_ext rpy2.ipython

In [None]:
%%R
install.packages("rpart")
install.packages("rpart.plot")

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/src/contrib/rpart_4.1.24.tar.gz'
Content type 'application/x-gzip' length 620065 bytes (605 KB)
downloaded 605 KB


The downloaded source packages are in
	‘/tmp/RtmpJm3xxy/downloaded_packages’
Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/src/contrib/rpart.plot_3.1.2.tar.gz'
Content type 'application/x-gzip' length 673083 bytes (657 KB)
downloaded 657 KB


The downloaded source packages are in
	‘/tmp/RtmpJm3xxy/downloaded_packages’
