In [1]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.svm import SVC
import streamlit

In [3]:
iris = datasets.load_iris()
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [6]:
# iris is saved as a dictionary. 
# en data, cada lista es una fila
# the x-y split is already done, in "target"

In [7]:
# X-y split (but they are arrays, not dataframes)
X = iris["data"]
y = iris["target"]

In [8]:
iris["feature_names"]

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [10]:
X_df = pd.DataFrame(X, columns=["sepal_lenght", "sepal_width", "petal_length", "petal_width"])
X_df.head()

Unnamed: 0,sepal_lenght,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [11]:
len(X_df)

150

In [12]:
# añadir target, simplemente asignando columna
X_df["target"] = y
X_df.head()

Unnamed: 0,sepal_lenght,sepal_width,petal_length,petal_width,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [14]:
X_df.target.value_counts()

0    50
1    50
2    50
Name: target, dtype: int64

In [15]:
X_df.describe()

Unnamed: 0,sepal_lenght,sepal_width,petal_length,petal_width,target
count,150.0,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333,1.0
std,0.828066,0.435866,1.765298,0.762238,0.819232
min,4.3,2.0,1.0,0.1,0.0
25%,5.1,2.8,1.6,0.3,0.0
50%,5.8,3.0,4.35,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,7.9,4.4,6.9,2.5,2.0


In [17]:
# train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [18]:
# modeling

# asign each model to a variable
lin_reg = LinearRegression()
log_reg = LogisticRegression()
svc_ = SVC()

# fit the model
lin_reg_fit = lin_reg.fit(X_train, y_train)
log_reg_fit = log_reg.fit(X_train, y_train)
svc_fit = svc_.fit(X_train, y_train)

In [None]:
# como no vamos a evaluar el modelo con métricas etc, no sería necesario hacer train test split
# podríamos entrenar al modelo con el df entero

In [23]:
user_data = {"sepal_length":4.8, 
            "sepal_width":2.3, 
            "petal_length":3.3, 
            "petal_width":1.0}

features_df = pd.DataFrame(user_data, index=["user"]) # hay que especificar index para crar df, si no error
features_df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
user,4.8,2.3,3.3,1.0


In [27]:
# predictions

lin_reg_fit.predict(features_df)



array([0.93713189])

In [25]:
log_reg_fit.predict(features_df)



array([1])

In [26]:
svc_fit.predict(features_df)



array([1])

In [None]:
# the 3 models agree that user is 1 = versicolor

In [35]:
prediction = round(lin_reg_fit.predict(features_df)[0], 0) # redondeamos porque linear regression puede dar decimales
# lin_reg_fit.predict(features_df)[0] porque es un array



In [36]:
# for having output not with number but with name of flower

def classify(result):
    if result == 0: return " iris setosa"
    elif result == 1: return "versicolor"
    elif result == 2: return "virginica"

In [37]:
classify(prediction)

'versicolor'