In [None]:
from textblob import TextBlob
import pandas as pd
import nltk
nltk.download('punkt')

In [None]:
file_path = 'sep2022_updated.csv'
master_df = pd.read_csv(file_path)
df = master_df.copy()
df

In [None]:
replacement_value = 'n/a'

for col in df.columns:
  if col.startswith('Comment'):
    df[col] = df[col].fillna(replacement_value)

df

In [None]:
def preprocess_text(text):
    if isinstance(text, str):
        text = text.lower()
        words = nltk.word_tokenize(text)
        return words
    else:
        return []

df['y'] = df['y'].apply(preprocess_text)
df['z'] = df['z'].apply(preprocess_text)
df['aa'] = df['aa'].apply(preprocess_text)

df

In [None]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt

def clean_text(text):
    if isinstance(text, str):
        return text
    else:
        return ""

cleaned_text_y = df['y'].explode().apply(clean_text)
cleaned_text_z = df['z'].explode().apply(clean_text)
cleaned_text_aa = df['aa'].explode().apply(clean_text)

wordcloud_y = WordCloud(width=800, height=600, background_color='white').generate(' '.join(cleaned_text_y))
wordcloud_z = WordCloud(width=800, height=600, background_color='white').generate(' '.join(cleaned_text_z))
wordcloud_aa = WordCloud(width=800, height=600, background_color='white').generate(' '.join(cleaned_text_aa))

plt.figure(figsize=(30, 15))

plt.subplot(131)  # 1行3列の1番目のプロット
plt.imshow(wordcloud_y, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud - y')

plt.subplot(132)  # 1行3列の2番目のプロット
plt.imshow(wordcloud_z, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud - z')

plt.subplot(133)  # 1行3列の3番目のプロット
plt.imshow(wordcloud_aa, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud - aa')
plt.show()


In [None]:
nltk.download('stopwords')
lst_stopwords = nltk.corpus.stopwords.words("english")

df_clean = df.copy()

for column in ['y', 'z', 'aa']:
    df_clean[column] = df_clean[column].apply(lambda x: ' '.join([word for word in x if word not in lst_stopwords]))

display(df.head(10))
display(df_clean.head(10))

In [None]:
def preprocess_text(text):
    if isinstance(text, str):
        text = text.lower()
        words = nltk.word_tokenize(text)
        return words
    else:
        return []

df_clean['y'] = df_clean['y'].apply(preprocess_text)
df_clean['z'] = df_clean['z'].apply(preprocess_text)
df_clean['aa'] = df_clean['aa'].apply(preprocess_text)

def clean_text(text):
    if isinstance(text, str):
        return text
    else:
        return ""

cleaned_text_y_new = df_clean['y'].explode().apply(clean_text)
cleaned_text_z_new = df_clean['z'].explode().apply(clean_text)
cleaned_text_aa_new = df_clean['aa'].explode().apply(clean_text)

wordcloud_y_new = WordCloud(width=800, height=600, background_color='white').generate(' '.join(cleaned_text_y_new))
wordcloud_z_new = WordCloud(width=800, height=600, background_color='white').generate(' '.join(cleaned_text_z_new))
wordcloud_aa_new = WordCloud(width=800, height=600, background_color='white').generate(' '.join(cleaned_text_aa_new))

plt.figure(figsize=(30, 15))

plt.subplot(131)  # 1行3列の1番目のプロット
plt.imshow(wordcloud_y_new, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud - y stopword')

plt.subplot(132)  # 1行3列の2番目のプロット
plt.imshow(wordcloud_z_new, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud - z stopword')

plt.subplot(133)  # 1行3列の3番目のプロット
plt.imshow(wordcloud_aa_new, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud - aa stopword')
plt.show()

In [None]:
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [None]:
df_clean

In [None]:
df_clean["y_sentiment"] = df_clean["y"].apply(lambda x: TextBlob(' '.join(x)).sentiment.polarity)
df_clean["z_sentiment"] = df_clean["z"].apply(lambda x: TextBlob(' '.join(x)).sentiment.polarity)
df_clean["aa_sentiment"] = df_clean["aa"].apply(lambda x: TextBlob(' '.join(x)).sentiment.polarity)

display(df_clean.head(10))

In [None]:
df["y_sentiment"] = df["y"].apply(lambda x: TextBlob(' '.join(x)).sentiment.polarity)
df["z_sentiment"] = df["z"].apply(lambda x: TextBlob(' '.join(x)).sentiment.polarity)
df["aa_sentiment"] = df["aa"].apply(lambda x: TextBlob(' '.join(x)).sentiment.polarity)

display(df.head(10))

In [None]:
df.to_csv('output.csv', index=False)
df_clean.to_csv('output_clean.csv', index=False)