# Black Task

In [1]:
import re
import nltk
import csv
import pandas as pd
import math
import sys

## main function

In [2]:
def main():
    upVerbs = ["went up","increased","rose"]
    downVerbs = ["went down","decreased","fell"]

    #Prepare to reading csv file.
    df = pd.read_csv("input.csv",header=None,dtype=str)
    df = df.fillna('missing')

    if NumberOfError(df) > 0:
        sys.exit()
    
    #extract nouns
    nouns = ExtractNouns(df)
    
    #extract values
    P = [int(i) for i in df.iloc[1:,1]]
    M = [i for i in df.iloc[1:,0]]
    
    #extract max value and min value
    maxValue = max(P)
    minValue = min(P)
    
    #remove elements with similar tendencies
    P,M = RemoveSameTrend(P,M)
    
    #generate subject
    cntEven = 0
    flagMax = 0
    flagMin = 0
    
    for i in range(len(P)-1):        
        P1 = P[i]
        P2 = P[i+1]
        M1 = M[i]
        M2 = M[i+1]
        
        noun = nouns[i % len(nouns)]
        if P1 < P2:
            verb = upVerbs[i % len(upVerbs)]
        else:
            verb = downVerbs[i % len(upVerbs)]
        
        if P1 == P2:
            #when the value does not change
            sentence = GenerateEven(cntEven,noun,P1,M1,M2)
            cntEven += 1
        else:
            sentence = GenerateSentence(noun,verb,P1,P2,M1,M2,maxValue,minValue,flagMax,flagMin,i)
            
            if maxValue in [P1,P2]:
                flagMax = 1
            if minValue in [P1,P2]:
                flagMin = 1
        
        print(sentence)

## error detection

In [3]:
def NumberOfError(df):
    cntErr = 0
    errTex = []

    #Error when subject is empty
    if df.at[0,0] == 'missing':
        errTex.append('Row 0, column 0 : Subject is missing')
        cntErr += 1
    
    #Prepare to raise an error if the information being entered is incorrect
    for i in range(1,len(df)):
        for j in [0,1]:
            p = 'Row ' + str(i) + ', column ' + str(j) + ' : '
            if df.at[i,j] == 'missing':
                errTex.append(p + 'Value is missing')
                cntErr += 1
            elif j == 0 and df.at[i,j].isdigit():
                errTex.append(p + 'Please enter a time period')
                cntErr += 1
            elif j == 1 and not df.at[i,j].isdigit():
                errTex.append(p + 'Please enter a number')
                cntErr += 1

    #Display an error if there is more than one error
    if cntErr > 0:
        print(str(cntErr) + ' error'+ ("s" if cntErr > 1 else "") + ' found')
        for i in errTex:
            print(i)
            
    return cntErr

### example

In [4]:
#Prepare to reading csv file.
df_ex = pd.read_csv("input_error.csv",header=None,dtype=str)
df_ex = df_ex.fillna('missing')

print("The return value is",NumberOfError(df_ex))

5 errors found
Row 0, column 0 : Subject is missing
Row 2, column 0 : Please enter a time period
Row 2, column 1 : Please enter a number
Row 3, column 0 : Value is missing
Row 3, column 1 : Value is missing
Return value is 5


## extract nouns using nltk

In [5]:
def ExtractNouns(df):
    #extract nouns and tagging by type
    tokens = nltk.word_tokenize(df.at[0,0])
    tagged = nltk.pos_tag(tokens)
    nouns = [df.at[0,0],"They"]

    #Add various expressions for a single noun to nouns
    for i in tagged:
        if i[1] in ['NN','NNS','NNP','NNPS']:
            nouns.append("The " + i[0])
            if i[1] in ['NN','NNP']:
                nouns.append("It")
            else:
                nouns.append("They")
                
    return nouns

### example

In [6]:
#Prepare to reading csv file.
df_ex = pd.read_csv("input.csv",header=None,dtype=str)
df_ex = df_ex.fillna('missing')

nouns_ex = ExtractNouns(df_ex)
print("subject :",df_ex.at[0,0])
print("nouns :",nouns_ex)

subject : The number of bananas
nouns : ['The number of bananas', 'They', 'The number', 'It', 'The bananas', 'They']


## remove same trend

In [7]:
def RemoveSameTrend(P,M):
    newP = [P[0]]
    newM = [M[0]]
    for i in range(1,len(P)-1):
        sub1 = P[i] - P[i-1]
        sub2 = P[i+1] - P[i]
        if sub1 * sub2 > 0:
            continue
        if sub1 == sub2 == 0:
            continue
        newP.append(P[i])
        newM.append(M[i])

    newP.append(P[-1])
    newM.append(M[-1])
    
    return newP,newM

### example

In [8]:
P_ex1 = [1,2,3,3,3,2,1]
M_ex1 = ['a','b','c','d','e','f','g']
P_ex2,M_ex2 = RemoveSameTrend(P_ex1,M_ex1)

print("before remove :",P_ex1,M_ex1)
print("after remove :",P_ex2,M_ex2)

before remove : [1, 2, 3, 3, 3, 2, 1] ['a', 'b', 'c', 'd', 'e', 'f', 'g']
after remove : [1, 3, 3, 1] ['a', 'c', 'e', 'g']


## generate sentence when trends are the same

In [9]:
def GenerateEven(cntEven,noun,P1,M1,M2):
    sentence = noun
    
    if cntEven % 3 == 0:
        sentence += " remained stable at "
    elif cntEven % 3 == 1:
        sentence += " remained steady at "
    else:
        sentence += " evened out "
            
    sentence += str(P1) + " from " + M1 + " to " + M2 + "."
    
    return sentence

### example

In [10]:
for i in range(4):
    print("cntEven =",i,", return value :",GenerateEven(i,"The number","100","jan","Mar"))

cntEven = 0 , return value : The number remained stable at 100 from jan to Mar.
cntEven = 1 , return value : The number remained steady at 100 from jan to Mar.
cntEven = 2 , return value : The number evened out 100 from jan to Mar.
cntEven = 3 , return value : The number remained stable at 100 from jan to Mar.


## generate sentence

In [11]:
def GenerateSentence(noun,verb,P1,P2,M1,M2,maxValue,minValue,flagMax,flagMin,i):
    #calculate rate of change and choose expression of the amount of change
    rate = 1
    if P1 > 0:
        rate = float(abs(P1 - P2)) / float(P1)
    
    if rate <= 0.05:
        adj = "slight"
    elif rate >= 1:
        adj = "significant"
    else:
        adj = "substantial"
        
    #Determine the increase/decrease and add a sentence
    #Add passive sentences one out of three times
    if i % 4 == 3:
        sentence = "There was a " + adj        
        if P1 < P2:
            sentence += " increase"
        else:
            sentence += " decrease"  
    else:
        #Change the wording according to the number of times the sentence was created
        sentence = noun + " "      
        if P1 < P2:
            sentence += verb
        else:
            sentence += verb
        sentence += " " + adj + "ly"
    
    #prepare to adding values to the sentence
    S1 = str(P1)
    S2 = str(P2)
    
    #Add a special expression if the value is the maximum or minimum
    #Change the wording according to the number of times the sentence was created
    if P1 == maxValue:
        if flagMax == 0:
            S1 = "a high of " + S1
            flagMax = 1
        else:
            S1 = "its peak"
    
    if P1 == minValue:
        if flagMin == 0:
            S1 = "a low of " + S1
            flagMin = 1
        else:
            S1 = "its low"
            
    if P2 == maxValue:
        if flagMax == 0:
            S2 = "a high of " + S2
            flagMax = 1
        else:
            S2 = "its peak"
            
    if P2 == minValue:
        if flagMin == 0:
            S2 = "a low of " + S2
            flagMin = 1
        else:
            S2 = "its low"
    
    #Add a time period to the sentence
    #Change the wording according to the number of times the sentence was created
    if i % 5 == 0:
        sentence += " from " + S1 + " in " + M1 + " to " + S2 + " in " + M2 + "."
    elif i % 5 == 1:
        sentence += " from " + S1 + " in " + M1 +" by " + str(abs(P1-P2)) + " to " + S2 + " in " + M2 + "."
    elif i % 5 == 2:
        sentence += " in " + M1 + " by " + str(abs(P1-P2)) + " to " + S2 + " in " + M2 + "."
    elif i % 5 == 3:
        sentence += " from " + S1 + " in " + M1 +" by " + str(abs(P1-P2))
        if S2[0] == 'a':
            sentence += " to " + S2
        sentence += " in " + M2 + "."
    else:
        sentence = "From " + M1 + " to " + M2 + ", " + sentence.lower() + " to " + S2
        
    return sentence

### example : expression max/min

In [12]:
print(GenerateSentence("The number","rose",100,200,"Jun","Mar",1000,0,0,0,0))
print(GenerateSentence("The number","rose",100,200,"Jun","Mar",200,0,0,0,0))
print(GenerateSentence("The number","rose",100,200,"Jun","Mar",200,0,1,0,0))
print(GenerateSentence("The number","rose",100,200,"Jun","Mar",1000,100,0,0,0))
print(GenerateSentence("The number","rose",100,200,"Jun","Mar",1000,100,0,1,0))

The number rose significantly from 100 in Jun to 200 in Mar.
The number rose significantly from 100 in Jun to a high of 200 in Mar.
The number rose significantly from 100 in Jun to its peak in Mar.
The number rose significantly from a low of 100 in Jun to 200 in Mar.
The number rose significantly from its low in Jun to 200 in Mar.


### example : expression of period and expression of passive

In [13]:
for i in range(5):
    print("i =",i,":",GenerateSentence("The number","rose",100,200,"Jun","Mar",1000,0,0,0,i))

i = 0 : The number rose significantly from 100 in Jun to 200 in Mar.
i = 1 : The number rose significantly from 100 in Jun by 100 to 200 in Mar.
i = 2 : The number rose significantly in Jun by 100 to 200 in Mar.
i = 3 : There was a significant increase from 100 in Jun by 100 in Mar.
i = 4 : From Jun to Mar, the number rose significantly to 200


## execution main function

In [14]:
main()

The number of bananas went up significantly from a low of 1 in Jan to 600 in Mar.
They remained stable at 600 from Mar to Apr.
The number fell substantially in Apr by 590 to 10 in Jul.
It remained steady at 10 from Jul to Aug.
From Aug to Sep, the bananas increased significantly to a high of 1000
They fell substantially from its peak in Sep to its low in Oct.
The number of bananas went up significantly from its low in Oct by 99 to 100 in Nov.
There was a substantial decrease in Nov by 50 to 50 in Dec.
