In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
df=pd.read_csv("../input/feedback-prize-2021/train.csv")

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.head(2)

In [None]:
# fetching all the labels
list(df['discourse_type'].unique())

In [None]:
# checking count of all the labels
df['discourse_type'].value_counts()

In [None]:
def bar_plot(df, x, y, color, get_width, get_x, y_lim, rotation, title, xlabel, ylabel, annot_size, xticks_size, legend):
    fig, ax = plt.subplots(figsize = (12,7), dpi = 80)

#     cmap = cm.get_cmap(color, df.shape[0])   
#     clrs = [matplotlib.colors.rgb2hex(cmap(i)) for i in range(cmap.N)]

    ax=sns.barplot(x=df[x], y=df[y], palette=color)

    total = sum(df[y])
    for i in ax.patches:
        ax.text(i.get_x()+get_x, i.get_height()+get_width, \
                str(round((i.get_height()/total)*100, 2))+'%', fontsize=annot_size, weight='bold',
                    color='black')

    plt.title(title, size=20, weight='bold', color='#1b515e')
    plt.xlabel(xlabel, fontsize=15, weight='bold', color='#1b515e')
    plt.ylabel(ylabel, fontsize=15, weight='bold', color='#1b515e')
    plt.xticks(rotation=rotation, fontsize=xticks_size)
    plt.yticks(fontsize=12)
    plt.ylim(y_lim)

    for i in ['bottom', 'left']:
        ax.spines[i].set_color('white')
        ax.spines[i].set_linewidth(1.5)
    
    right_side = ax.spines["right"]
    right_side.set_visible(False)
    top_side = ax.spines["top"]
    top_side.set_visible(False)

    ax.set_axisbelow(True)
    ax.grid(color='#b2d6c7', linewidth=1, axis='y', alpha=.3)
#     MA = mpatches.Patch(color=clrs[0], label=legend)
#     ax.legend(handles=[MA], prop={'size': 10.5}, loc='best', borderpad=1, 
#               labelcolor=clrs[0], edgecolor='white');
    
    plt.show()

In [None]:
q1_df = pd.DataFrame(df['discourse_type'].value_counts()).reset_index().rename(columns={'index':'discourse_type', 'discourse_type':'Users'})

In [None]:
q1_df

In [None]:
bar_plot(q1_df, 'discourse_type', 'Users', 'rainbow', 100, -.05, (0, 60000), 0,
        '\n Different discourse_type\n', '\n discourse_type\n', '\n Users\n',
        10, 13, 'discourse_type with Maximum\n no. of Users')

In [None]:
df.columns

In [None]:
df['len_word'] = df['discourse_text'].apply(lambda x: len(x.split()))

In [None]:
# Category wise Dataframes
df_lead = df.loc[df["discourse_type"]=='Lead']
df_claim = df.loc[df["discourse_type"]=='Claim']
df_evidence = df.loc[df["discourse_type"]=='Evidence']
df_position = df.loc[df["discourse_type"]=='Position']
df_concluding = df.loc[df["discourse_type"]=='Concluding Statement']
df_rebuttal = df.loc[df["discourse_type"]=='Rebuttal']
df_counterclaim = df.loc[df["discourse_type"]=='Counterclaim']

In [None]:
# Lead
plt.figure(figsize=(5,5))
print("mean of word lengths: ",df_lead.len_word.mean())
sns.distplot(df_lead['len_word'],  color="b")
plt.show()

In [None]:
# Claim
plt.figure(figsize=(5,5))
print("mean of word lengths: ",df_claim.len_word.mean())
sns.distplot(df_claim['len_word'],  color="b")
plt.show()

In [None]:
# Evidence
plt.figure(figsize=(5,5))
print("mean of word lengths: ",df_evidence.len_word.mean())
sns.distplot(df_evidence['len_word'],  color="b")
plt.show()

In [None]:
# Position
plt.figure(figsize=(5,5))
print("mean of word lengths: ",df_position.len_word.mean())
sns.distplot(df_position['len_word'],  color="b")
plt.show()

In [None]:
# Concluding Statement
plt.figure(figsize=(5,5))
print("mean of word lengths: ",df_concluding.len_word.mean())
sns.distplot(df_concluding['len_word'],  color="b")
plt.show()

In [None]:
# Rebuttal
plt.figure(figsize=(5,5))
print("mean of word lengths: ",df_rebuttal.len_word.mean())
sns.distplot(df_rebuttal['len_word'],  color="b")
plt.show()

In [None]:
# Counterclaim
print("mean of word lengths: ",df_counterclaim.len_word.mean())
plt.figure(figsize=(5,5))
sns.distplot(df_counterclaim['len_word'],  color="b")
plt.show()

In [None]:
def horizontal_bar_plot(df, x, y, color, get_width, get_y, x_lim, x_spine, plot_size, title, xlabel, ylabel, annot_size, legend, legend_loc):
    fig, ax = plt.subplots(figsize = plot_size, dpi = 80)

#     cmap = cm.get_cmap(color, df.shape[0])   
#     clrs = [matplotlib.colors.rgb2hex(cmap(i)) for i in range(cmap.N)]

    ax=sns.barplot(x=df[x], y=df[y], palette=color)

    total = sum(df[x])
    for p in ax.patches:
        plt.text(p.get_width()+get_width, p.get_y()+get_y,
                '{:.2f}%'.format(p.get_width()*100/total),ha='center', va='center', fontsize=annot_size, color='black', weight='bold')

    plt.title(title, size=20, weight='bold', color='#1b515e')
    plt.xlabel(xlabel, fontsize=15, weight='bold', color='#1b515e')
    plt.ylabel(ylabel, fontsize=15, weight='bold', color='#1b515e')
    plt.xticks(fontsize=13)
    plt.yticks(fontsize=12)
    plt.xlim(x_lim)

    for i in ['top', 'left', 'right']:
        side = ax.spines[i]
        side.set_visible(False)

    ax.set_axisbelow(True)
    ax.spines['bottom'].set_bounds(x_spine)
    ax.grid(color='#b2d6c7', linewidth=1, axis='y', alpha=.3)

#     MA = mpatches.Patch(color=clrs[0], label=legend)
#     ax.legend(handles=[MA], prop={'size': 10.5}, loc=legend_loc, borderpad=1, 
#               labelcolor=[clrs[0]], edgecolor='white');

In [None]:
q5_df = pd.DataFrame(df['discourse_type_num'].value_counts()).reset_index().rename(columns={'index':'discourse_type_num', 'discourse_type_num':'Users'})[:15]

horizontal_bar_plot(q5_df, 'Users', 'discourse_type_num', 'winter', 10, 0.8, (0, 15000), 
                    (0, 15000), (10, 8), '\n Different discourse_type_num Users\n', 
                    '\nUsers', 'discourse_type_num', 15, 'Role with maximum Records', 'best')

In [None]:
ax = df.groupby('discourse_type')[['discourse_start','discourse_end']].mean().sort_values('discourse_start').plot(kind='barh', figsize=(10, 5),)
ax.set_title('Average Discourse Label Start and End', fontsize=16)
plt.show()

In [None]:
df['full_text'] = df['discourse_text'].groupby(df['id']).transform(lambda x: ' '.join(x)) # obviously we will have duplicates

In [None]:
df.full_text.iloc[0]

# Text length

In [None]:
text_length = df['full_text'].drop_duplicates().apply(len)

In [None]:
fig = plt.figure(figsize=(10,8))
ax1 = text_length.plot(kind='hist', color = "#120f7a", bins=100)
ax1.set_title('Essay Length Distribution')
ax1.set_xlabel("Essay Length")
ax1.set_ylabel("Frequency")

plt.show()

# Word Count

In [None]:
word_count = df['full_text'].drop_duplicates().apply(lambda x: len(str(x).split()))
fig = plt.figure(figsize=(10,8))

ax1 = word_count.plot(kind='hist', color = "#120f7a", bins=100)
ax1.set_title('Word Count Distribution')
ax1.set_xlabel("Word Count")
ax1.set_ylabel("Frequency")

plt.show()

# Distribution of top n-grams for full-text essays

In [None]:
from nltk.corpus import stopwords
from wordcloud import WordCloud, STOPWORDS
from sklearn.feature_extraction.text import CountVectorizer

In [None]:
def get_top_n_words(corpus, n=None, remove_stop_words=False, n_words=1): # if n_words=1 -> unigrams, if n_words=2 -> bigrams..
    if remove_stop_words:
        vec = CountVectorizer(stop_words = 'english', ngram_range=(n_words, n_words)).fit(corpus)
    else:
        vec = CountVectorizer(ngram_range=(n_words, n_words)).fit(corpus)
    bag_of_words = vec.transform(corpus)
    sum_words = bag_of_words.sum(axis=0) 
    words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
    words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
    return words_freq[:n]

# unigram

In [None]:
common_words = get_top_n_words(df['full_text'].drop_duplicates(), 20, remove_stop_words=True, n_words=1)
for word, freq in common_words:
    print(word, freq)

In [None]:
df_tmp = pd.DataFrame(common_words, columns = ['text' , 'count'])

fig = plt.figure(figsize=(10,8))

ax1 = df_tmp.groupby('text').sum()['count'].sort_values(ascending=False).plot(
    kind='bar', color = "#120f7a")
ax1.set_title('Unigram Distribution')
ax1.set_xlabel("Unigrams")
ax1.set_ylabel("Frequency")

plt.show()

# Bigram

In [None]:
common_words = get_top_n_words(df['full_text'].drop_duplicates(), 20, remove_stop_words=True, n_words=2)
for word, freq in common_words:
    print(word, freq)

In [None]:
df_tmp = pd.DataFrame(common_words, columns = ['text' , 'count'])

fig = plt.figure(figsize=(10,8))

ax1 = df_tmp.groupby('text').sum()['count'].sort_values(ascending=False).plot(
    kind='bar', color = "#120f7a")
ax1.set_title('Bigram Distribution')
ax1.set_xlabel("Bigrams")
ax1.set_ylabel("Frequency")

plt.show()

# Trigram

In [None]:
common_words = get_top_n_words(df['full_text'].drop_duplicates(), 20, remove_stop_words=True, n_words=3)
for word, freq in common_words:
    print(word, freq)

In [None]:
df_tmp = pd.DataFrame(common_words, columns = ['text' , 'count'])

fig = plt.figure(figsize=(10,8))

ax1 = df_tmp.groupby('text').sum()['count'].sort_values(ascending=False).plot(
    kind='bar', color = "#120f7a")
ax1.set_title('Trigram Distribution')
ax1.set_xlabel("Trigrams")
ax1.set_ylabel("Frequency")

plt.show()

# Distribution of top n-grams for each discourse type

# unigram

In [None]:
text_Lead = df[df.discourse_type == 'Lead'].discourse_text.values
text_Position = df[df.discourse_type == 'Position'].discourse_text.values
text_Evidence = df[df.discourse_type == 'Evidence'].discourse_text.values
text_Claim = df[df.discourse_type == 'Claim'].discourse_text.values
text_Concluding_Statement = df[df.discourse_type == 'Concluding Statement'].discourse_text.values
text_Counterclaim = df[df.discourse_type == 'Counterclaim'].discourse_text.values
text_Rebuttal = df[df.discourse_type == 'Rebuttal'].discourse_text.values

common_words_Lead = get_top_n_words(text_Lead, 20, remove_stop_words=True, n_words=1)
common_words_Position = get_top_n_words(text_Position, 20, remove_stop_words=True, n_words=1)
common_words_Evidence = get_top_n_words(text_Evidence, 20, remove_stop_words=True, n_words=1)
common_words_Claim = get_top_n_words(text_Claim, 20, remove_stop_words=True, n_words=1)
common_words_Concluding_Statement = get_top_n_words(text_Concluding_Statement, 20, remove_stop_words=True, n_words=1)
common_words_Counterclaim = get_top_n_words(text_Counterclaim, 20, remove_stop_words=True, n_words=1)
common_words_Rebuttal = get_top_n_words(text_Rebuttal, 20, remove_stop_words=True, n_words=1)

In [None]:
df_tmp_Lead = pd.DataFrame(common_words_Lead, columns = ['text' , 'count'])
df_tmp_Position = pd.DataFrame(common_words_Position, columns = ['text' , 'count'])
df_tmp_Evidence = pd.DataFrame(common_words_Evidence, columns = ['text' , 'count'])
df_tmp_Claim = pd.DataFrame(common_words_Claim, columns = ['text' , 'count'])
df_tmp_Concluding_Statement = pd.DataFrame(common_words_Concluding_Statement, columns = ['text' , 'count'])
df_tmp_Counterclaim = pd.DataFrame(common_words_Counterclaim, columns = ['text' , 'count'])
df_tmp_Rebuttal = pd.DataFrame(common_words_Rebuttal, columns = ['text' , 'count'])

fig = plt.figure(figsize=(15,6))

ax1 = fig.add_subplot(121)
ax1 = df_tmp_Lead.groupby('text').sum()['count'].sort_values(ascending=False).plot(
    kind='bar', color = "#120f7a")
ax1.set_title('Lead Unigram Distribution')
ax1.set_xlabel("Unigrams")
ax1.set_ylabel("Frequency")

ax2 = fig.add_subplot(122)
ax2 = df_tmp_Position.groupby('text').sum()['count'].sort_values(ascending=False).plot(
   kind='bar', color = "#120f7a")
ax2.set_title('Position Unigram Distribution')
ax2.set_xlabel("Unigrams")
ax2.set_ylabel("Frequency")

fig = plt.figure(figsize=(15,6))

ax1 = fig.add_subplot(121)
ax1 = df_tmp_Evidence.groupby('text').sum()['count'].sort_values(ascending=False).plot(
    kind='bar', color = "#120f7a")
ax1.set_title('Evidence Unigram Distribution')
ax1.set_xlabel("Unigrams")
ax1.set_ylabel("Frequency")

ax2 = fig.add_subplot(122)
ax2 = df_tmp_Claim.groupby('text').sum()['count'].sort_values(ascending=False).plot(
   kind='bar', color = "#120f7a")
ax2.set_title('Claim Unigram Distribution')
ax2.set_xlabel("Unigrams")
ax2.set_ylabel("Frequency")

fig = plt.figure(figsize=(15,6))

ax1 = fig.add_subplot(121)
ax1 = df_tmp_Concluding_Statement.groupby('text').sum()['count'].sort_values(ascending=False).plot(
    kind='bar', color = "#120f7a")
ax1.set_title('Concluding Statement Unigram Distribution')
ax1.set_xlabel("Unigrams")
ax1.set_ylabel("Frequency")

ax2 = fig.add_subplot(122)
ax2 = df_tmp_Counterclaim.groupby('text').sum()['count'].sort_values(ascending=False).plot(
   kind='bar', color = "#120f7a")
ax2.set_title('Counterclaim Unigram Distribution')
ax2.set_xlabel("Unigrams")
ax2.set_ylabel("Frequency")

fig = plt.figure(figsize=(15,6))

ax1 = fig.add_subplot(121)
ax1 = df_tmp_Rebuttal.groupby('text').sum()['count'].sort_values(ascending=False).plot(
    kind='bar', color = "#120f7a")
ax1.set_title('Rebuttal Unigram Distribution')
ax1.set_xlabel("Unigrams")
ax1.set_ylabel("Frequency")

plt.show()

# Bigram

In [None]:
text_Lead = df[df.discourse_type == 'Lead'].discourse_text.values
text_Position = df[df.discourse_type == 'Position'].discourse_text.values
text_Evidence = df[df.discourse_type == 'Evidence'].discourse_text.values
text_Claim = df[df.discourse_type == 'Claim'].discourse_text.values
text_Concluding_Statement = df[df.discourse_type == 'Concluding Statement'].discourse_text.values
text_Counterclaim = df[df.discourse_type == 'Counterclaim'].discourse_text.values
text_Rebuttal = df[df.discourse_type == 'Rebuttal'].discourse_text.values

common_words_Lead = get_top_n_words(text_Lead, 20, remove_stop_words=True, n_words=2)
common_words_Position = get_top_n_words(text_Position, 20, remove_stop_words=True, n_words=2)
common_words_Evidence = get_top_n_words(text_Evidence, 20, remove_stop_words=True, n_words=2)
common_words_Claim = get_top_n_words(text_Claim, 20, remove_stop_words=True, n_words=2)
common_words_Concluding_Statement = get_top_n_words(text_Concluding_Statement, 20, remove_stop_words=True, n_words=2)
common_words_Counterclaim = get_top_n_words(text_Counterclaim, 20, remove_stop_words=True, n_words=2)
common_words_Rebuttal = get_top_n_words(text_Rebuttal, 20, remove_stop_words=True, n_words=2)

In [None]:
df_tmp_Lead = pd.DataFrame(common_words_Lead, columns = ['text' , 'count'])
df_tmp_Position = pd.DataFrame(common_words_Position, columns = ['text' , 'count'])
df_tmp_Evidence = pd.DataFrame(common_words_Evidence, columns = ['text' , 'count'])
df_tmp_Claim = pd.DataFrame(common_words_Claim, columns = ['text' , 'count'])
df_tmp_Concluding_Statement = pd.DataFrame(common_words_Concluding_Statement, columns = ['text' , 'count'])
df_tmp_Counterclaim = pd.DataFrame(common_words_Counterclaim, columns = ['text' , 'count'])
df_tmp_Rebuttal = pd.DataFrame(common_words_Rebuttal, columns = ['text' , 'count'])

fig = plt.figure(figsize=(15,6))

ax1 = fig.add_subplot(121)
ax1 = df_tmp_Lead.groupby('text').sum()['count'].sort_values(ascending=False).plot(
    kind='bar', color = "#120f7a")
ax1.set_title('Lead Bigram Distribution')
ax1.set_xlabel("Bigrams")
ax1.set_ylabel("Frequency")

ax2 = fig.add_subplot(122)
ax2 = df_tmp_Position.groupby('text').sum()['count'].sort_values(ascending=False).plot(
   kind='bar', color = "#120f7a")
ax2.set_title('Position Bigram Distribution')
ax2.set_xlabel("Bigrams")
ax2.set_ylabel("Frequency")

fig = plt.figure(figsize=(15,6))

ax1 = fig.add_subplot(121)
ax1 = df_tmp_Evidence.groupby('text').sum()['count'].sort_values(ascending=False).plot(
    kind='bar', color = "#120f7a")
ax1.set_title('Evidence Bigram Distribution')
ax1.set_xlabel("Bigrams")
ax1.set_ylabel("Frequency")

ax2 = fig.add_subplot(122)
ax2 = df_tmp_Claim.groupby('text').sum()['count'].sort_values(ascending=False).plot(
   kind='bar', color = "#120f7a")
ax2.set_title('Claim Bigram Distribution')
ax2.set_xlabel("Bigrams")
ax2.set_ylabel("Frequency")

fig = plt.figure(figsize=(15,6))

ax1 = fig.add_subplot(121)
ax1 = df_tmp_Concluding_Statement.groupby('text').sum()['count'].sort_values(ascending=False).plot(
    kind='bar', color = "#120f7a")
ax1.set_title('Concluding Statement Bigram Distribution')
ax1.set_xlabel("Bigrams")
ax1.set_ylabel("Frequency")

ax2 = fig.add_subplot(122)
ax2 = df_tmp_Counterclaim.groupby('text').sum()['count'].sort_values(ascending=False).plot(
   kind='bar', color = "#120f7a")
ax2.set_title('Counterclaim Bigram Distribution')
ax2.set_xlabel("Bigrams")
ax2.set_ylabel("Frequency")

fig = plt.figure(figsize=(15,6))

ax1 = fig.add_subplot(121)
ax1 = df_tmp_Rebuttal.groupby('text').sum()['count'].sort_values(ascending=False).plot(
    kind='bar', color = "#120f7a")
ax1.set_title('Rebuttal Bigram Distribution')
ax1.set_xlabel("Bigrams")
ax1.set_ylabel("Frequency")

plt.show()

# trigram

In [None]:
text_Lead = df[df.discourse_type == 'Lead'].discourse_text.values
text_Position = df[df.discourse_type == 'Position'].discourse_text.values
text_Evidence = df[df.discourse_type == 'Evidence'].discourse_text.values
text_Claim = df[df.discourse_type == 'Claim'].discourse_text.values
text_Concluding_Statement = df[df.discourse_type == 'Concluding Statement'].discourse_text.values
text_Counterclaim = df[df.discourse_type == 'Counterclaim'].discourse_text.values
text_Rebuttal = df[df.discourse_type == 'Rebuttal'].discourse_text.values

common_words_Lead = get_top_n_words(text_Lead, 20, remove_stop_words=True, n_words=3)
common_words_Position = get_top_n_words(text_Position, 20, remove_stop_words=True, n_words=3)
common_words_Evidence = get_top_n_words(text_Evidence, 20, remove_stop_words=True, n_words=3)
common_words_Claim = get_top_n_words(text_Claim, 20, remove_stop_words=True, n_words=3)
common_words_Concluding_Statement = get_top_n_words(text_Concluding_Statement, 20, remove_stop_words=True, n_words=3)
common_words_Counterclaim = get_top_n_words(text_Counterclaim, 20, remove_stop_words=True, n_words=3)
common_words_Rebuttal = get_top_n_words(text_Rebuttal, 20, remove_stop_words=True, n_words=3)


In [None]:
df_tmp_Lead = pd.DataFrame(common_words_Lead, columns = ['text' , 'count'])
df_tmp_Position = pd.DataFrame(common_words_Position, columns = ['text' , 'count'])
df_tmp_Evidence = pd.DataFrame(common_words_Evidence, columns = ['text' , 'count'])
df_tmp_Claim = pd.DataFrame(common_words_Claim, columns = ['text' , 'count'])
df_tmp_Concluding_Statement = pd.DataFrame(common_words_Concluding_Statement, columns = ['text' , 'count'])
df_tmp_Counterclaim = pd.DataFrame(common_words_Counterclaim, columns = ['text' , 'count'])
df_tmp_Rebuttal = pd.DataFrame(common_words_Rebuttal, columns = ['text' , 'count'])

fig = plt.figure(figsize=(15,6))

ax1 = fig.add_subplot(121)
ax1 = df_tmp_Lead.groupby('text').sum()['count'].sort_values(ascending=False).plot(
    kind='bar', color = "#120f7a")
ax1.set_title('Lead Trigram Distribution')
ax1.set_xlabel("Trigrams")
ax1.set_ylabel("Frequency")

ax2 = fig.add_subplot(122)
ax2 = df_tmp_Position.groupby('text').sum()['count'].sort_values(ascending=False).plot(
   kind='bar', color = "#120f7a")
ax2.set_title('Position Trigram Distribution')
ax2.set_xlabel("Trigrams")
ax2.set_ylabel("Frequency")

fig = plt.figure(figsize=(15,6))

ax1 = fig.add_subplot(121)
ax1 = df_tmp_Evidence.groupby('text').sum()['count'].sort_values(ascending=False).plot(
    kind='bar', color = "#120f7a")
ax1.set_title('Evidence Trigram Distribution')
ax1.set_xlabel("Trigrams")
ax1.set_ylabel("Frequency")

ax2 = fig.add_subplot(122)
ax2 = df_tmp_Claim.groupby('text').sum()['count'].sort_values(ascending=False).plot(
   kind='bar', color = "#120f7a")
ax2.set_title('Claim Trigram Distribution')
ax2.set_xlabel("Trigrams")
ax2.set_ylabel("Frequency")

fig = plt.figure(figsize=(15,6))

ax1 = fig.add_subplot(121)
ax1 = df_tmp_Concluding_Statement.groupby('text').sum()['count'].sort_values(ascending=False).plot(
    kind='bar', color = "#120f7a")
ax1.set_title('Concluding Statement Trigram Distribution')
ax1.set_xlabel("Trigrams")
ax1.set_ylabel("Frequency")

ax2 = fig.add_subplot(122)
ax2 = df_tmp_Counterclaim.groupby('text').sum()['count'].sort_values(ascending=False).plot(
   kind='bar', color = "#120f7a")
ax2.set_title('Counterclaim Trigram Distribution')
ax2.set_xlabel("Trigrams")
ax2.set_ylabel("Frequency")

fig = plt.figure(figsize=(15,6))

ax1 = fig.add_subplot(121)
ax1 = df_tmp_Rebuttal.groupby('text').sum()['count'].sort_values(ascending=False).plot(
    kind='bar', color = "#120f7a")
ax1.set_title('Rebuttal Trigram Distribution')
ax1.set_xlabel("Trigrams")
ax1.set_ylabel("Frequency")

plt.show()

In [None]:
del df_tmp_Lead, df_tmp_Position, df_tmp_Evidence, df_tmp_Claim, df_tmp_Concluding_Statement, df_tmp_Counterclaim, df_tmp_Rebuttal, common_words_Lead, common_words_Position, common_words_Evidence, common_words_Claim, common_words_Concluding_Statement, common_words_Counterclaim,common_words_Rebuttal 