In [1]:
import pandas as pd

In [2]:
prompt_responses = pd.read_csv('../Data_Folder/alzU_responses_only.csv')

In [3]:
prompt_responses.head()

Unnamed: 0,uid,ShortSketch,TechPrompt
0,35256,,
1,35521,Reading and fantastical games provided relief ...,Technology allows me to follow politics events...
2,35170,,
3,35135,Getting my first horse. I remember riding him...,I find social media to be sucking the life our...
4,35137,,We spend way too much time on phones and compu...


### Number of Survey Responses

In [4]:
prompt_responses.shape[0]

766

### Number of Question Responders

In [5]:
short_sketch_responses = prompt_responses[prompt_responses['ShortSketch'].notnull()]['ShortSketch']

short_sketch_responses.count()

195

In [6]:
tech_prompt_responses = prompt_responses[prompt_responses['TechPrompt'].notnull()]['TechPrompt']
tech_prompt_responses.count()

231

### Number of Question Responders


In [7]:
def lexical_diversity(text):
    return len(set(text)) / (len(text)*1.0)

In [8]:
print("Mean Word Count: Short Sketch")
print(short_sketch_responses.str.split().apply(len).mean())



Mean Word Count: Short Sketch
49.5897435897


In [9]:
print("Mean Word Count: Tech Prompt")
print(tech_prompt_responses.str.split().apply(len).mean())

Mean Word Count: Tech Prompt
28.619047619


In [10]:
print("Variance Word Count: Short Sketch")
print(short_sketch_responses.str.split().apply(len).var())

Variance Word Count: Short Sketch
1357.25350251


In [11]:
print("Variance Word Count: Tech Crunch")
print(tech_prompt_responses.str.split().apply(len).var())

Variance Word Count: Tech Crunch
535.662939959


In [12]:
print("Mean Sentence Count: Short Sketch")
print(short_sketch_responses.str.split(".").apply(len).mean())

Mean Sentence Count: Short Sketch
4.33333333333


In [13]:
print("Mean Sentence Count: Tech Prompt")
print(tech_prompt_responses.str.split(".").apply(len).mean())

Mean Sentence Count: Tech Prompt
3.09956709957


In [14]:
print("Mean Lexical Diversity: Short Sketch")
print(short_sketch_responses.apply(lexical_diversity).mean())


Mean Lexical Diversity: Short Sketch
0.174661553498


In [15]:
print("Mean Lexical Diversity: Tech Prompt")
print(tech_prompt_responses.apply(lexical_diversity).mean())

Mean Lexical Diversity: Tech Prompt
0.268657552167


#### Merge datasets to find all users with survey responses and CFT Scores

In [16]:
emails_and_ids = pd.read_csv('../Data_Folder/AlzU_ID_and_email.csv')

In [18]:
cft_scores = pd.read_csv('../Data_Folder/CFT_5.11.csv')

cft_scores = cft_scores[['EmailAddress', 'FinalScore']]

cft_scores = cft_scores[cft_scores['FinalScore'].notnull()]

cft_scores.shape

(7422, 2)

In [19]:
scores_with_id = cft_scores.set_index('EmailAddress').join(emails_and_ids.set_index('Email'))


scores_with_id = scores_with_id[['FinalScore', 'User ID']]

scores_with_id = scores_with_id[scores_with_id['User ID'].notnull()]

scores_with_id.shape

(4933, 2)

In [20]:
completed_prompts = prompt_responses[prompt_responses['ShortSketch'].notnull()]
completed_prompts = completed_prompts[completed_prompts['TechPrompt'].notnull()]

# completed_prompts.head()
prompts_and_scores = completed_prompts.set_index('uid').join(scores_with_id.set_index('User ID'))

prompts_and_scores = prompts_and_scores[prompts_and_scores['FinalScore'].notnull()]

In [21]:
prompts_and_scores.reset_index()
prompts_and_scores.head()

Unnamed: 0,ShortSketch,TechPrompt,FinalScore
15401,Remember Dad taking us to a ballgame to see th...,My immediate and extended family can communica...,44.18447
15857,I remember when there was a fire in a house up...,My work is computer based so I am on the compu...,61.381639
15898,Playing games and matchbox cars with friends o...,I use social media reluctantly because thatâ€™...,42.728304
15898,Playing games and matchbox cars with friends o...,I use social media reluctantly because thatâ€™...,59.73321
16280,I survived an abusive childhood at the hand of...,Medical advancements based on technology are w...,43.965908


In [22]:
prompts_and_scores.shape

(138, 3)

### Find user with highest CFT score

In [23]:
top_score = prompts_and_scores.ix[prompts_and_scores['FinalScore'].argmax()]


In [24]:
top_score

Unnamed: 0,ShortSketch,TechPrompt,FinalScore
28931,I loved every Friday night when I was growing ...,Technology can make some of my tasks faster an...,60.899469
28931,I loved every Friday night when I was growing ...,Technology can make some of my tasks faster an...,51.814911
28931,I loved every Friday night when I was growing ...,Technology can make some of my tasks faster an...,64.902491


In [25]:
top_sketch = top_score['ShortSketch'].iloc[0]

print(top_sketch)
print(len(top_sketch.split()))
print(len(top_sketch.split(".")))
print(lexical_diversity(top_sketch))

I loved every Friday night when I was growing up. My mother always made a special meal and no one made any plans to go out. it was family time!
30
3
0.181818181818


In [26]:
top_tech = top_score['TechPrompt'].iloc[0]

print(top_tech)
print(len(top_tech.split()))
print(len(top_tech.split(".")))
print(lexical_diversity(top_tech))

Technology can make some of my tasks faster and easier. I have very minimal involvement in social media so that has no impact on my life.
26
3
0.167883211679


### Find user with lowest CFT score

In [27]:
low_score = prompts_and_scores.ix[prompts_and_scores['FinalScore'].argmin()]

In [28]:
low_score

ShortSketch    When I was a child we went camping in the Reed...
TechPrompt     Allot. Every day for hours. Sometimes research...
FinalScore                                                 17.07
Name: 31971, dtype: object

In [29]:
low_sketch = low_score['ShortSketch']

print(low_sketch)
print(len(low_sketch.split()))
print(len(low_sketch.split(".")))
print(lexical_diversity(low_sketch))

When I was a child we went camping in the Reedwoods.  It made a Very big impression. When I think about it I feel like I am there hiking. To me the Redwoods are spiritual. 
35
5
0.162790697674


In [30]:
low_tech = low_score['TechPrompt']

print(low_tech)
print(len(low_tech.split()))
print(len(low_tech.split(".")))
print(lexical_diversity(low_tech))

Allot. Every day for hours. Sometimes research about things that interest me. I want to keep learning. Also,I do shopping on line. I am lost without my iPad. I have Kindle also. When watching TV I try to watch programs I can learn from.  
44
9
0.142857142857
