#### 1. Importacao das bibliotecas

In [None]:
import joblib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC

#### 2. Importando os dados

In [None]:
headlines_df = pd.read_csv('/content/drive/MyDrive/NLP/dados/news_headlines.csv')
headlines_df.reset_index(drop=True, inplace=True) #Apaga indices caso existam

#### 3. Visualizacao dos dados

In [None]:
headlines_df.info()

In [None]:
headlines_df.head()

In [None]:
headlines_df.tail()

In [None]:
sns.countplot(y='tags', data=headlines_df, palette='pastel')
plt.show()

#### 4. Convertendo rotulos

In [None]:
le = LabelEncoder()

In [None]:
Y = le.fit_transform(headlines_df['tags'])

In [None]:
le.classes_

#### 5. Separando dados de treino e teste

In [None]:
x_train, x_test, y_train, y_test = train_test_split(headlines_df['post'], Y, test_size=0.2, random_state=5)

#### 6. Definindo o Pipeline

In [None]:
pipeline = Pipeline([
                     ('vectorizer', TfidfVectorizer()), # Features
                     ('clf', LinearSVC()), # Classificador
])

In [None]:
pipeline.steps

#### 7. Treinamento do modelo

In [None]:
pipeline.fit(x_test, y_test)

#### 8. Avaliacao do Modelo

In [None]:
pred = pipeline.predict(x_test)

In [None]:
pred

In [None]:
accuracy_score(y_test, pred) # Acuracia geral

In [None]:
print(classification_report(y_test, pred)) # Acuracia especifica por classe

In [None]:
le.classes_

#### 9. Predicao

In [None]:
text = 'Sao Paulo Futebol Clube foi campeao paulista de 2021.'


In [None]:
prediction = pipeline.predict([text])

In [None]:
le.inverse_transform(prediction)[0]

In [None]:
text = 'Doria desiste de candidatura a Presidencia em 2022.'

In [None]:
prediction = pipeline.predict([text])

In [None]:
le.inverse_transform(prediction)[0]

#### 10. Exportando modelo para uso em producao

In [None]:
joblib.dump(pipeline, "model.joblib")