# Oranges vs. Grapefruit

The task of separating oranges and grapefruit is fairly obvious to a human, but even with manual observation there is still a bit of error. This dataset takes the color, weight, and diameter of an "average" orange and grapefruit and generates a larger dataset containing a wide variety of values and are "oranges" and "grapefruit".

In [None]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier

In [None]:
data=pd.read_csv("../input/oranges-vs-grapefruit/citrus.csv")
data.head()

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
print(data["name"].unique())
print(" ")
print(data["name"].value_counts())

In [None]:
data.replace({'orange':0, 'grapefruit':1} )

# Preprocessing


In [None]:
def preprocessing (df):
    df=df.copy()
    
    #splitting
    y=df["name"].copy()
    X=df.drop("name",axis=1).copy()
    
    #Scalling
    scaler=StandardScaler()
    X=scaler.fit_transform(X)

    #training
    X_train,X_test,y_train,y_test=train_test_split(X,y,train_size=0.7,shuffle=0,random_state=123)
    
    return X_train,X_test,y_train,y_test

In [None]:
X_train,X_test,y_train,y_test=preprocessing(data)

In [None]:
models={
    "LogisticRegression          ":LogisticRegression(),
    "RandomForestClassifier      ":RandomForestClassifier(),
    "GradientBoostingClassifier  ":GradientBoostingClassifier(),
    "DecisionTreeClassifier      ":DecisionTreeClassifier()
}

for name,model in models.items():
    model.fit(X_train,y_train)
    
for name,model in models.items():
    print(name + ":{:.2f}%".format(model.score(X_train,y_train)*100))