In [2]:
# Monta o Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Caminho base do projeto
base_path = "/content/drive/MyDrive/Eixo_05/dados/"


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
from datasets import load_dataset
import pandas as pd
import os

# Carrega splits
ds = load_dataset("stanfordnlp/imdb")

# Converte para pandas (usando apenas train e test)
df_train = ds["train"].to_pandas()
df_test  = ds["test"].to_pandas()

# Mapeia label -> sentiment
label_map = {0: "negative", 1: "positive"}
df_train["sentiment"] = df_train["label"].map(label_map)
df_test["sentiment"]  = df_test["label"].map(label_map)

# Renomeia 'text' -> 'review' (para casar com o pipeline)
df_train = df_train.rename(columns={"text": "review"})[["review", "sentiment"]]
df_test  = df_test.rename(columns={"text": "review"})[["review", "sentiment"]]

# Concatena os dois conjuntos
df_all = pd.concat([df_train, df_test], ignore_index=True)

# Cria a pasta 'dados' no Drive se ainda não existir
os.makedirs(base_path, exist_ok=True)

# Salva o CSV no Drive
csv_path = os.path.join(base_path, "dataset.csv")
df_all.to_csv(csv_path, index=False)

print(df_all.head())
print(f"✅ Dataset salvo em: {csv_path}")
print(f"Total de linhas: {len(df_all)}")


                                              review sentiment
0  I rented I AM CURIOUS-YELLOW from my video sto...  negative
1  "I Am Curious: Yellow" is a risible and preten...  negative
2  If only to avoid making this type of film in t...  negative
3  This film was probably inspired by Godard's Ma...  negative
4  Oh, brother...after hearing about this ridicul...  negative
✅ Dataset salvo em: /content/drive/MyDrive/Eixo_05/dados/dataset.csv
Total de linhas: 50000
