In [1]:
import torch
from transformers import BertTokenizer
from transformers import BertForSequenceClassification, AdamW, BertConfig
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from transformers import get_linear_schedule_with_warmup

import pandas as pd
import numpy as np
import random
import time
import datetime
import os

from utils import (
    tokenizer_setting,
    preprocessing,
    GPU_setting,
    hyperparmeter_setting,
    flat_accuracy,
    format_time,
    initial_setting,
    run_train,
    run_test,
    convert_input_data,
    test_sentence_unit,
    test_sentence_many
)

### 1. Initial Setting
- Load the data
- Split train, test data
- Load tokenizer

In [2]:
path = './../../data/rotten_tomato/emotion_analysis_data/'
os.listdir(path)

['checkpoint.pth.tar',
 'model_new.pth',
 'model_save',
 'test.txt',
 'train.txt',
 'val.txt']

In [3]:
header = ['default']
train_df = pd.read_csv(os.path.join(path + 'train.txt'), names=header, encoding='utf-8')
test_df = pd.read_csv(os.path.join(path + 'test.txt'), names=header, encoding='utf-8')
val_df = pd.read_csv(os.path.join(path + 'val.txt'), names=header, encoding='utf-8')

In [4]:
train_df.head()

Unnamed: 0,default
0,i didnt feel humiliated;sadness
1,i can go from feeling so hopeless to so damned...
2,im grabbing a minute to post i feel greedy wro...
3,i am ever feeling nostalgic about the fireplac...
4,i am feeling grouchy;anger


In [5]:
df_list = [train_df, test_df, val_df]

for df in df_list:
    df['content'] = df.default.str.split(';').str[0]
    df['emotion'] = df.default.str.split(';').str[1]
    df.drop('default', axis=1, inplace=True)

In [6]:
train_df.head()

Unnamed: 0,content,emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [7]:
set(train_df['emotion'])

{'anger', 'fear', 'joy', 'love', 'sadness', 'surprise'}

In [8]:
change_value_dict = {'anger':0, 'fear':1, 'joy':2, 'love':3, 'sadness':4, 'surprise':5}

In [9]:
df_list = [train_df, test_df, val_df]

for df in df_list:
    df = df.replace({'emotion': change_value_dict}, inplace=True)

In [10]:
val_df.head()

Unnamed: 0,content,emotion
0,im feeling quite sad and sorry for myself but ...,4
1,i feel like i am still looking at a blank canv...,4
2,i feel like a faithful servant,3
3,i am just feeling cranky and blue,0
4,i can have for a treat or if i am feeling festive,2


In [11]:
print(len(train_df))
print(len(test_df))
print(len(val_df))

16000
2000
2000


In [12]:
X_train = train_df['content']
y_train = train_df['emotion']

X_test = test_df['content']
y_test = test_df['emotion']

X_val = val_df['content']
y_val = val_df['emotion']

In [13]:
tokenizer = tokenizer_setting()

---
### 2. Pre-processing

In [14]:
train_dataloader = preprocessing(X_train, y_train, tokenizer, process_type='test')

In [15]:
test_dataloader = preprocessing(X_test, y_test, tokenizer, process_type='test')

In [16]:
validation_dataloader = preprocessing(X_val, y_val, tokenizer, process_type='test')

---
### 3. Pre-training

In [71]:
start_time = time.time()
model = BertForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels = 6) #label개수에 따라 변경
model.cuda()
print("  Loading took: {:}".format(format_time(time.time() - start_time)))

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model ch

  Loading took: 0:34:15


In [18]:
device = GPU_setting()

There are 1 GPU(s) available.
Device 0 : NVIDIA GeForce RTX 3080


In [18]:
optimizer, epochs, total_steps, scheduler = hyperparmeter_setting(model, train_dataloader, lr=2e-5, eps=1e-8, epochs=3)

NameError: name 'model' is not defined

In [74]:
model = initial_setting(model, seed_val=42)

---
### 4. Fine-tuning

In [78]:
model = run_train(model, epochs, train_dataloader, validation_dataloader, optimizer, scheduler, device, path)


Training...

  Average training loss: 0.76
  Training epoch took: 0:09:59
  Model Checkpoint Save
file_path: ./../../data/rotten_tomato/emotion_analysis_data/checkpoint.pth.tar

Running Validation...
  Accuracy: 0.92
  Validation took: 0:00:20

Training...

  Average training loss: 0.21
  Training epoch took: 0:08:26
  Model Checkpoint Save
file_path: ./../../data/rotten_tomato/emotion_analysis_data/checkpoint.pth.tar

Running Validation...
  Accuracy: 0.93
  Validation took: 0:00:20

Training...

  Average training loss: 0.13
  Training epoch took: 0:08:25
  Model Checkpoint Save
file_path: ./../../data/rotten_tomato/emotion_analysis_data/checkpoint.pth.tar

Running Validation...
  Accuracy: 0.93
  Validation took: 0:00:20

Total took: 0:28:06
Training complete!


### 5. Save the model

In [79]:
torch.save(model.state_dict(), path+"model_new.pth")

In [19]:
model.load_state_dict(torch.load(path+"model_new.pth"))
model.eval()

NameError: name 'model' is not defined

In [81]:
import os

# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()

output_dir = path + 'model_save/'

# Create output directory if needed
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

print("Saving model to %s" % output_dir)

# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
model_to_save.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

Saving model to ./../../data/rotten_tomato/emotion_analysis_data/model_save/


('./../../data/rotten_tomato/emotion_analysis_data/model_save/tokenizer_config.json',
 './../../data/rotten_tomato/emotion_analysis_data/model_save/special_tokens_map.json',
 './../../data/rotten_tomato/emotion_analysis_data/model_save/vocab.txt',
 './../../data/rotten_tomato/emotion_analysis_data/model_save/added_tokens.json')

- Load the model

In [19]:
output_dir = path + 'model_save/'

In [20]:
output_dir

'./../../data/rotten_tomato/emotion_analysis_data/model_save/'

In [21]:
# Load a trained model and vocabulary that you have fine-tuned
start_time = time.time()
model_new = BertForSequenceClassification.from_pretrained(output_dir)
tokenizer = BertTokenizer.from_pretrained(output_dir)

# Copy the model to the GPU.
model_new.to(device)
print("  Loading took: {:}".format(format_time(time.time() - start_time)))

  Loading took: 0:29:43


In [22]:
model = model_new

---
### 6. Testing

In [83]:
run_test(model, test_dataloader, device)


Accuracy: 0.93
Test took: 0:00:21


- 단일 문장

In [27]:
change_value_dict

{'anger': 0, 'fear': 1, 'joy': 2, 'love': 3, 'sadness': 4, 'surprise': 5}

In [23]:
sentence = "Whether audiences will get behind The Lightning Thief is hard to predict. Overall, it's an entertaining introduction to a promising new world -- but will the consuming shadow of Potter be too big to break free of?"
logits = test_sentence_unit(model, device, tokenizer, [sentence])

print(logits)
print(np.argmax(logits))

[[-1.4027362 -1.5702496  5.7927184 -1.575778  -1.0552726 -0.5253772]]
2


In [34]:
start_time = time.time()
sentence = "i hate you"
logits = test_sentence_unit(model, device, tokenizer, [sentence])

print(logits)
print(np.argmax(logits))
print("  Loading took: {:}".format(format_time(time.time() - start_time)))

[[ 1.9511262   0.41871074  0.8441826  -2.3129752   0.46622762 -2.3007205 ]]
0
  Loading took: 0:00:00


- 여러 문장

In [67]:
path2 = './../../data/rotten_tomato/'

In [71]:
merged_df = pd.read_csv(path2+'rotten_rating_review_table.csv')

In [72]:
merged_df.head()

Unnamed: 0,user_id,movie_id,origin_index,rotten_tomatoes_link,critic_name,top_critic,publisher_name,review_type,review_score,review_date,review_content
0,943,0,3,m/0814255,Ben McEachen,False,Sunday Mail (Australia),Fresh,0.7,2010-02-09,Whether audiences will get behind The Lightnin...
1,7242,0,6,m/0814255,Nick Schager,False,Slant Magazine,Rotten,0.25,2010-02-10,Harry Potter knockoffs don't come more transpa...
2,1046,0,7,m/0814255,Bill Goodykoontz,True,Arizona Republic,Fresh,0.7,2010-02-10,"Percy Jackson isn't a great movie, but it's a ..."
3,4895,0,8,m/0814255,Jordan Hoffman,False,UGO,Fresh,0.7,2010-02-10,"Fun, brisk and imaginative"
4,4517,0,9,m/0814255,Jim Schembri,True,The Age (Australia),Fresh,0.6,2010-02-10,"Crammed with dragons, set-destroying fights an..."


In [74]:
len(merged_df)

752664

In [38]:
rating_df = merged_df[['user_id', 'movie_id', 'review_score', 'review_content', 'review_type','review_date', 'critic_name', 'top_critic', 'publisher_name']]

In [39]:
rating_df.head()

Unnamed: 0,user_id,movie_id,review_score,review_content,review_type,review_date,critic_name,top_critic,publisher_name
0,943,0,0.7,Whether audiences will get behind The Lightnin...,Fresh,2010-02-09,Ben McEachen,False,Sunday Mail (Australia)
1,7242,0,0.25,Harry Potter knockoffs don't come more transpa...,Rotten,2010-02-10,Nick Schager,False,Slant Magazine
2,1046,0,0.7,"Percy Jackson isn't a great movie, but it's a ...",Fresh,2010-02-10,Bill Goodykoontz,True,Arizona Republic
3,4895,0,0.7,"Fun, brisk and imaginative",Fresh,2010-02-10,Jordan Hoffman,False,UGO
4,4517,0,0.6,"Crammed with dragons, set-destroying fights an...",Fresh,2010-02-10,Jim Schembri,True,The Age (Australia)


In [75]:
len(rating_df)

752664

In [55]:
import itertools

In [56]:
def test_sentence_many(model, device, tokenizer, sentences):
    start_time = time.time()
    
    # 출력된 label 리스트
    label_list = list() 
    
    # 평가모드로 변경
    model.eval()

    # 문장을 입력 데이터로 변환
    inputs, masks = convert_input_data(tokenizer, sentences)

    # 데이터를 GPU에 넣음
    b_input_ids = inputs.to(device)
    b_input_mask = masks.to(device)
    
    test_data = TensorDataset(b_input_ids, b_input_mask)
    test_dataloader = DataLoader(test_data, batch_size=32)
    
    # 데이터로더에서 배치만큼 반복하여 가져옴
    for step, batch in enumerate(test_dataloader):
        # 경과 정보 표시
        if step % 100 == 0 and not step == 0:
            elapsed = format_time(time.time() - start_time)
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(test_dataloader), elapsed))

        # 배치를 GPU에 넣음
        batch = tuple(b.to(device) for b in batch)

        # 배치에서 데이터 추출
        b_input_ids, b_input_mask = batch
    
        # 그래디언트 계산 안함
        with torch.no_grad():     
            # Forward 수행
            outputs = model(b_input_ids, 
                            token_type_ids=None, 
                            attention_mask=b_input_mask)

        # 로스 구함
        logits = outputs[0]

        # CPU로 데이터 이동
        preds = logits.detach().cpu().numpy()
        pred_flat = np.argmax(preds, axis=1).flatten()
        label_list.append(list(pred_flat))
    
    # 이중 리스트를 단일 리스트로 변경
    result = list(itertools.chain.from_iterable(label_list))    
    return result

In [58]:
sentences = rating_df.review_content

start_time = time.time()

# 출력된 label 리스트
label_list = list() 

# 평가모드로 변경
model.eval()

# 문장을 입력 데이터로 변환
inputs, masks = convert_input_data(tokenizer, sentences)

# 데이터를 GPU에 넣음
b_input_ids = inputs.to(device)
b_input_mask = masks.to(device)

test_data = TensorDataset(b_input_ids, b_input_mask)
test_dataloader = DataLoader(test_data, batch_size=32)

# 데이터로더에서 배치만큼 반복하여 가져옴
for step, batch in enumerate(test_dataloader):
    # 경과 정보 표시
    if step % 100 == 0 and not step == 0:
        elapsed = format_time(time.time() - start_time)
        print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(test_dataloader), elapsed))

    # 배치를 GPU에 넣음
    batch = tuple(b.to(device) for b in batch)

    # 배치에서 데이터 추출
    b_input_ids, b_input_mask = batch

    # 그래디언트 계산 안함
    with torch.no_grad():     
        # Forward 수행
        outputs = model(b_input_ids, 
                        token_type_ids=None, 
                        attention_mask=b_input_mask)

    # 로스 구함
    logits = outputs[0]

    # CPU로 데이터 이동
    preds = logits.detach().cpu().numpy()
    pred_flat = np.argmax(preds, axis=1).flatten()
    label_list.append(list(pred_flat))

# 이중 리스트를 단일 리스트로 변경
result = list(itertools.chain.from_iterable(label_list))    

  Batch   100  of  23,521.    Elapsed: 0:05:51.
  Batch   200  of  23,521.    Elapsed: 0:06:22.
  Batch   300  of  23,521.    Elapsed: 0:06:52.
  Batch   400  of  23,521.    Elapsed: 0:07:23.
  Batch   500  of  23,521.    Elapsed: 0:07:54.
  Batch   600  of  23,521.    Elapsed: 0:08:25.
  Batch   700  of  23,521.    Elapsed: 0:08:56.
  Batch   800  of  23,521.    Elapsed: 0:09:27.
  Batch   900  of  23,521.    Elapsed: 0:09:58.
  Batch 1,000  of  23,521.    Elapsed: 0:10:29.
  Batch 1,100  of  23,521.    Elapsed: 0:11:00.
  Batch 1,200  of  23,521.    Elapsed: 0:11:31.
  Batch 1,300  of  23,521.    Elapsed: 0:12:02.
  Batch 1,400  of  23,521.    Elapsed: 0:12:33.
  Batch 1,500  of  23,521.    Elapsed: 0:13:04.
  Batch 1,600  of  23,521.    Elapsed: 0:13:35.
  Batch 1,700  of  23,521.    Elapsed: 0:14:06.
  Batch 1,800  of  23,521.    Elapsed: 0:14:36.
  Batch 1,900  of  23,521.    Elapsed: 0:15:07.
  Batch 2,000  of  23,521.    Elapsed: 0:15:38.
  Batch 2,100  of  23,521.    Elapsed: 0

In [57]:
start_time = time.time()
labels = test_sentence_many(model, device, tokenizer, rating_df.review_content)
print("  Labeling took: {:}".format(format_time(time.time() - start_time)))

KeyboardInterrupt: 

In [61]:
labels = result

In [62]:
len(labels)

752664

In [63]:
rating_df['emotion'] = labels

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rating_df['emotion'] = labels


In [64]:
rating_df.head()

Unnamed: 0,user_id,movie_id,review_score,review_content,review_type,review_date,critic_name,top_critic,publisher_name,emotion
0,943,0,0.7,Whether audiences will get behind The Lightnin...,Fresh,2010-02-09,Ben McEachen,False,Sunday Mail (Australia),2
1,7242,0,0.25,Harry Potter knockoffs don't come more transpa...,Rotten,2010-02-10,Nick Schager,False,Slant Magazine,2
2,1046,0,0.7,"Percy Jackson isn't a great movie, but it's a ...",Fresh,2010-02-10,Bill Goodykoontz,True,Arizona Republic,2
3,4895,0,0.7,"Fun, brisk and imaginative",Fresh,2010-02-10,Jordan Hoffman,False,UGO,2
4,4517,0,0.6,"Crammed with dragons, set-destroying fights an...",Fresh,2010-02-10,Jim Schembri,True,The Age (Australia),2


In [66]:
rating_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 752664 entries, 0 to 752663
Data columns (total 10 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   user_id         752664 non-null  int64  
 1   movie_id        752664 non-null  int64  
 2   review_score    752664 non-null  float64
 3   review_content  752664 non-null  object 
 4   review_type     752664 non-null  object 
 5   review_date     752664 non-null  object 
 6   critic_name     752664 non-null  object 
 7   top_critic      752664 non-null  bool   
 8   publisher_name  752664 non-null  object 
 9   emotion         752664 non-null  int64  
dtypes: bool(1), float64(1), int64(3), object(5)
memory usage: 52.4+ MB


In [84]:
rating_df.loc[65471]

user_id                                                        4125
movie_id                                                       2210
review_score                                                    0.7
review_content    It's a helluva ride with more laughs than anti...
review_type                                                   Fresh
review_date                                              2018-09-14
critic_name                                           Jared Mobarak
top_critic                                                    False
publisher_name                                          BuffaloVibe
emotion                                                           2
Name: 65471, dtype: object

In [88]:
rating_df.to_excel('./../../data/rotten_tomato/rotten_rating_review_emotion_table.xlsx', index=False)

In [89]:
rating_df2 = pd.read_csv('./../../data/rotten_tomato/rotten_rating_review_sentiment_table.csv')

In [90]:
rating_df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 752664 entries, 0 to 752663
Data columns (total 10 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   user_id         752664 non-null  int64  
 1   movie_id        752664 non-null  int64  
 2   review_score    752664 non-null  float64
 3   review_content  752664 non-null  object 
 4   review_type     752664 non-null  object 
 5   review_date     752664 non-null  object 
 6   critic_name     752664 non-null  object 
 7   top_critic      752664 non-null  bool   
 8   publisher_name  752664 non-null  object 
 9   sentiment       752664 non-null  int64  
dtypes: bool(1), float64(1), int64(3), object(5)
memory usage: 52.4+ MB


In [91]:
rating_df2.to_excel('./../../data/rotten_tomato/rotten_rating_review_sentiment_table.xlsx', index=False)

---
### 7. Pretraining 층으로만 학습

In [None]:
# model_no_finetuning = BertForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=2)
# model_no_finetuning.cuda()

In [None]:
# #시작 시간 설정
# start_time = time.time()

# # 평가모드로 변경
# model_rotten2.eval()

# # 변수 초기화
# eval_loss, eval_accuracy = 0, 0
# nb_eval_steps, nb_eval_examples = 0, 0

# # 데이터로더에서 배치만큼 반복하여 가져옴
# for step, batch in enumerate(test_dataloader):
#     # 경과 정보 표시
#     if step % 100 == 0 and not step == 0:
#         elapsed = format_time(time.time() - start_time)
#         print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(test_dataloader), elapsed))

#     # 배치를 GPU에 넣음
#     batch = tuple(b.to(device) for b in batch)
    
#     # 배치에서 데이터 추출
#     b_input_ids, b_input_mask, b_labels = batch
    
#     # 그래디언트 계산 안함
#     with torch.no_grad():     
#         # Forward 수행
#         outputs = model_rotten2(b_input_ids, 
#                         token_type_ids=None, 
#                         attention_mask=b_input_mask)
    
#     # 로스 구함
#     logits = outputs[0]

#     # CPU로 데이터 이동
#     logits = logits.detach().cpu().numpy()
#     label_ids = b_labels.to('cpu').numpy()
    
#     # 출력 로짓과 라벨을 비교하여 정확도 계산
#     tmp_eval_accuracy = flat_accuracy(logits, label_ids)
#     eval_accuracy += tmp_eval_accuracy

# print("")
# print("Accuracy: {0:.2f}".format(eval_accuracy/len(test_dataloader)))
# print("Test took: {:}".format(format_time(time.time() - start_time)))

---
### 8. 특정 영화에 대한 긍정/부정 리뷰 취합 후 파일로 저장

In [33]:
# df = pd.read_csv(path+'rotten_review_scaled_label.csv')

In [34]:
# df.head()

Unnamed: 0,rotten_tomatoes_link,critic_name,top_critic,publisher_name,review_type,review_score,review_date,review_content,label
0,m/0814255,Ben McEachen,False,Sunday Mail (Australia),Fresh,1.0,2010-02-09,Whether audiences will get behind The Lightnin...,1
1,m/0814255,Nick Schager,False,Slant Magazine,Rotten,0.25,2010-02-10,Harry Potter knockoffs don't come more transpa...,0
2,m/0814255,Bill Goodykoontz,True,Arizona Republic,Fresh,1.0,2010-02-10,"Percy Jackson isn't a great movie, but it's a ...",1
3,m/0814255,Jordan Hoffman,False,UGO,Fresh,0.7,2010-02-10,"Fun, brisk and imaginative",1
4,m/0814255,Mark Adams,False,Daily Mirror (UK),Fresh,0.8,2010-02-10,"This action-packed fantasy adventure, based on...",1


In [38]:
# df_review = df[['rotten_tomatoes_link','review_content', 'label']]

In [92]:
# df_review.head()

In [126]:
# pivot_review = df_review.pivot_table(index='rotten_tomatoes_link', columns='label', aggfunc=len, fill_value=0)

In [137]:
# pivot_review

Unnamed: 0_level_0,review_content,review_content
label,0,1
rotten_tomatoes_link,Unnamed: 1_level_2,Unnamed: 2_level_2
m/+_one_2019,0,33
m/+h,2,2
m/-_man,1,3
m/-cule_valley_of_the_lost_ants,0,5
m/0814255,12,34
...,...,...
m/zoom_2006,22,3
m/zootopia,2,166
m/zorba_the_greek,0,3
m/zulu,0,4


In [145]:
# pivot_review.loc['m/0814255']

                label
review_content  0        12
                1        34
Name: m/0814255, dtype: int64

In [144]:
# pivot_review.loc['m/10000_bc']

                label
review_content  0        48
                1        12
Name: m/10000_bc, dtype: int64

- Movie1: 'm/0814255'

In [102]:
# # 긍정, 부정 리뷰들을 취합 후 파일저장
# def movie_sentiment_filter(df, movie_link, file_name):
#     condition = df['rotten_tomatoes_link'] == movie_link
#     df = df[condition]
    
#     pos_review_list = df[df['label'] == 1].reset_index(drop=True).review_content
#     movie1_pos = " ".join(pos_review_list)
    
#     neg_review_list = df[df['label'] == 0].reset_index(drop=True).review_content
#     movie1_neg = " ".join(neg_review_list)
    
#     f = open(path + f"{file_name}_pos.txt", 'w')
#     f.write(movie1_pos)
#     f.close()
    
#     f = open(path + f"{file_name}_neg.txt", 'w')
#     f.write(movie1_neg)
#     f.close()
    
#     print(f"{file_name}_pos/neg save finish!!")

In [96]:
# movie_sentiment_filter(df=df_review, movie_link='m/0814255', file_name='movie1')

movie1_pos/neg save finish!!


In [104]:
# movie_sentiment_filter(df=df_review, movie_link='m/0878835', file_name='movie2')

movie2_pos/neg save finish!!


In [105]:
# movie_sentiment_filter(df=df_review, movie_link='m/10000_bc', file_name='movie3')

movie3_pos/neg save finish!!
