# How to determine whether or not 20 million people from a larger population like a particular movie idea?

## Purpose of the document

To describe an algorithm using which I can find out whether or not at least 20 million people (out of a much larger population) will like a particular idea, based on a poll with 10-100 people.


In [23]:
import math
import scipy.stats
from IPython.core.display import HTML

# n - Number of total bets
# p - Probability of getting "red" at the roulette
# x - Maximum number of successful bets
def bin_prob2(x, n, p): 
    hh = scipy.stats.binom(n, p)
    total_p = 0.0
    for k in range(1, x + 1):  # DO NOT FORGET THAT THE LAST INDEX IS NOT USED
        total_p += hh.pmf(k)
    return total_p

In [24]:
# required_approval_pop = required number of people who like the log line in the population (e. g. 20 million)
# pop_size = population size (e. g. 140 million)
# sample_size = size of the sample (number of respndents)
def render_table(required_approval_pop, pop_size, sample_size):
    html = []

    html.append("<table>")
    html.append("<tr>")
    html.append("<th>")
    html.append("Number of people liking the logline")
    html.append("</th>")
    html.append("<th>")
    html.append("Probability that " + str(required_approval_pop) + " out " + str(pop_size) + " will like it")
    html.append("</th>")
    html.append("</tr>")

    for x in range(1, sample_size+1):
        prob = bin_prob2(x, sample_size, required_approval_pop/pop_size)
        html.append("<tr>")
        html.append("<td>")
        html.append(str(x))
        html.append("</td>")
        html.append("<td>")
        html.append( "{0:.2%}".format(prob))
        html.append("</td>")
        html.append("</tr>")
    html.append("</table>")
    return ''.join(html)

# Example 1: Sample of 100 people, total population 140 million

In [25]:
HTML(render_table(required_approval_pop=20000000, pop_size=140000000, sample_size=100))

Number of people liking the logline,Probability that 20000000 out 140000000 will like it
1,0.00%
2,0.00%
3,0.02%
4,0.08%
5,0.27%
6,0.79%
7,1.95%
8,4.18%
9,8.00%
10,13.78%


# Example 2: Sample of 10 people, total population 140 million

In [26]:
HTML(render_table(required_approval_pop=20000000, pop_size=140000000, sample_size=10))

Number of people liking the logline,Probability that 20000000 out 140000000 will like it
1,35.68%
2,62.43%
3,74.33%
4,77.79%
5,78.49%
6,78.58%
7,78.59%
8,78.59%
9,78.59%
10,78.59%


## Example 3: Sample of 100 people, total population 330 million

In [27]:
HTML(render_table(required_approval_pop=20000000, pop_size=330000000, sample_size=100))

Number of people liking the logline,Probability that 20000000 out 330000000 will like it
1,1.24%
2,5.21%
3,13.58%
4,26.67%
5,42.88%
6,59.44%
7,73.79%
8,84.55%
9,91.64%
10,95.81%


## Example 4: Sample of 10 people, total population 330 million

In [28]:
HTML(render_table(required_approval_pop=20000000, pop_size=330000000, sample_size=10))

Number of people liking the logline,Probability that 20000000 out 330000000 will like it
1,34.53%
2,44.55%
3,46.27%
4,46.47%
5,46.48%
6,46.48%
7,46.48%
8,46.48%
9,46.48%
10,46.48%


## Example 4: Sample of 20 people, total population 330 million

In [29]:
HTML(render_table(required_approval_pop=20000000, pop_size=330000000, sample_size=20))

Number of people liking the logline,Probability that 20000000 out 330000000 will like it
1,36.95%
2,59.60%
3,68.37%
4,70.77%
5,71.27%
6,71.35%
7,71.36%
8,71.36%
9,71.36%
10,71.36%


# Links

 * https://www.mathsisfun.com/data/binomial-distribution.html
 * https://gist.github.com/fbrundu/746ea5aee0b73a2ddd26