In [1]:
%pip install pandas tensorflow numpy 

Note: you may need to restart the kernel to use updated packages.


In [2]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

# Load the wine dataset
wine = load_wine()
wine_data, wine_target = wine.data, wine.target # type: ignore

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(wine_data, wine_target, test_size=0.2, random_state=42)

In [3]:
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(142, 13) (36, 13) (142,) (36,)


In [4]:
# Normalizando os dados
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

In [5]:
import numpy as np

# Concatenate the datasets
data_train = np.concatenate((x_train, y_train.reshape(-1, 1)), axis=1)
data_test = np.concatenate((x_test, y_test.reshape(-1, 1)), axis=1)

In [6]:
print("data_train:")
print(data_train)

data_train:
[[0.87105263 0.16089613 0.71657754 ... 0.25274725 0.30102443 2.        ]
 [0.39473684 0.94093686 0.68449198 ... 0.15384615 0.18676123 2.        ]
 [0.35263158 0.03665988 0.39572193 ... 0.54945055 0.30102443 1.        ]
 ...
 [0.88157895 0.19959267 0.54545455 ... 0.63369963 1.         0.        ]
 [0.43684211 0.13034623 0.48128342 ... 0.28937729 0.17100079 1.        ]
 [0.34473684 0.31771894 0.58823529 ... 0.77289377 0.12608353 1.        ]]


In [7]:
print("data_test:")
print(data_test)

data_test:
[[0.7716263  0.61139896 0.74137931 0.02       0.84444444 0.69230769
  0.73760933 0.         0.60096154 0.4431555  0.52702703 0.81746032
  0.38376384 0.        ]
 [0.96885813 0.85492228 0.63793103 0.39       0.73333333 0.76923077
  0.62682216 0.2826087  0.40384615 0.45939681 0.40540541 0.80555556
  0.55719557 0.        ]
 [0.52595156 0.53626943 0.86206897 0.6        0.4        0.0974359
  0.         0.7826087  0.16346154 0.38515086 0.27027027 0.40079365
  0.20295203 2.        ]
 [0.80276817 0.19689119 0.86206897 0.75       0.51111111 0.84615385
  0.80174927 0.26086957 0.94711538 0.51276108 0.83783784 0.55952381
  0.70848708 0.        ]
 [0.33217993 0.11139896 0.18965517 0.46       0.         0.38974359
  0.43731778 0.2173913  0.30288462 0.3944316  0.74324324 0.86507937
  0.13653137 1.        ]
 [1.         0.30569948 0.87931034 0.5        0.93333333 0.74358974
  0.7696793  0.34782609 0.75       0.57076573 0.67567568 0.53571429
  0.70479705 0.        ]
 [0.20415225 0.69689119 

In [8]:
wine.feature_names

['alcohol',
 'malic_acid',
 'ash',
 'alcalinity_of_ash',
 'magnesium',
 'total_phenols',
 'flavanoids',
 'nonflavanoid_phenols',
 'proanthocyanins',
 'color_intensity',
 'hue',
 'od280/od315_of_diluted_wines',
 'proline']

In [9]:
import pandas as pd
qtd_features = len(wine.feature_names) # type: ignore
data_train = pd.DataFrame(
  data_train, 
  columns=[f"x_{i}" for i in range(qtd_features) ] + ['target'])
data_train

Unnamed: 0,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,x_10,x_11,x_12,target
0,0.871053,0.160896,0.716578,0.742268,0.304348,0.627586,0.204641,0.754717,0.721519,1.000000,0.073171,0.252747,0.301024,2.0
1,0.394737,0.940937,0.684492,0.742268,0.282609,0.279310,0.054852,0.943396,0.215190,0.289520,0.276423,0.153846,0.186761,2.0
2,0.352632,0.036660,0.395722,0.407216,0.195652,0.875862,0.719409,0.207547,0.484177,0.245115,0.455285,0.549451,0.301024,1.0
3,0.644737,0.158859,0.684492,0.613402,0.206522,0.558621,0.160338,0.735849,0.591772,0.888988,0.073171,0.186813,0.269504,2.0
4,0.536842,0.124236,0.395722,0.252577,0.304348,0.489655,0.485232,0.283019,0.300633,0.174067,0.569106,0.520147,0.584712,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137,0.744737,0.126273,0.700535,0.742268,0.173913,0.679310,0.531646,0.150943,0.458861,0.145648,0.715447,0.692308,0.104019,1.0
138,0.321053,0.171079,0.406417,0.432990,0.108696,0.231034,0.356540,0.452830,0.382911,0.147425,0.422764,0.695971,0.182821,1.0
139,0.881579,0.199593,0.545455,0.072165,0.347826,0.800000,0.696203,0.301887,0.803797,0.511545,0.585366,0.633700,1.000000,0.0
140,0.436842,0.130346,0.481283,0.520619,0.108696,0.137931,0.236287,0.849057,0.379747,0.116341,0.390244,0.289377,0.171001,1.0


In [10]:
import pandas as pd
qtd_features = len(wine.feature_names) # type: ignore
data_test = pd.DataFrame(
  data_test, 
  columns=[f"x_{i}" for i in range(qtd_features) ] + ['target'])
data_test

Unnamed: 0,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,x_10,x_11,x_12,target
0,0.771626,0.611399,0.741379,0.02,0.844444,0.692308,0.737609,0.0,0.600962,0.443156,0.527027,0.81746,0.383764,0.0
1,0.968858,0.854922,0.637931,0.39,0.733333,0.769231,0.626822,0.282609,0.403846,0.459397,0.405405,0.805556,0.557196,0.0
2,0.525952,0.536269,0.862069,0.6,0.4,0.097436,0.0,0.782609,0.163462,0.385151,0.27027,0.400794,0.202952,2.0
3,0.802768,0.196891,0.862069,0.75,0.511111,0.846154,0.801749,0.26087,0.947115,0.512761,0.837838,0.559524,0.708487,0.0
4,0.33218,0.111399,0.189655,0.46,0.0,0.389744,0.437318,0.217391,0.302885,0.394432,0.743243,0.865079,0.136531,1.0
5,1.0,0.305699,0.87931,0.5,0.933333,0.74359,0.769679,0.347826,0.75,0.570766,0.675676,0.535714,0.704797,0.0
6,0.204152,0.696891,0.258621,0.4,0.2,0.333333,0.332362,0.434783,0.701923,0.0,0.486486,0.694444,0.176384,1.0
7,0.688581,0.821244,0.672414,0.8,0.533333,0.230769,0.072886,0.565217,0.480769,0.698376,0.175676,0.103175,0.313653,2.0
8,0.069204,0.158031,0.862069,0.5,0.355556,0.712821,0.705539,0.26087,1.0,0.158933,0.527027,0.777778,0.261993,1.0
9,0.67474,0.471503,0.560345,0.5,0.244444,0.025641,0.0,0.434783,0.110577,0.50116,0.175676,0.464286,0.335793,2.0


In [11]:
data_train.to_csv('train.csv', index=False)
data_test.to_csv('test.csv', index=False)
data = pd.concat([data_train, data_test])
data.to_csv('data.csv', index=False)