## Problem Statement (UseCase)
A movie review site needs to create an in memory store, to create their data and perform operations on it. You are asked to create a dummy model with 100 users and 1000 movies to explain how it will work

### Tasks to be performed:
1. Generate 1000 movie IDs starting from 1301 
2. Create a matrix, movies_matrix, to store users rating such that 
> There are 100 users<br>
> Each user can review as many movies as he/she wants<br>
> The review should be in between 0 to 10 (both inclusive)<br>
> Movie not reviewed will have value -1<br>
   
3. We have ten movie experts, let us take their reviews also. Also, 50 new movies have to be added and their reviews 

4. Create, final_movie_rating matrix with four columns i.e., ‘Movie ID’, ‘Average rating’, ‘Number of ratings’, and ‘Standard deviation of ratings’ 

5. Also convert the average movie ratings to have range from 0 to 10, such that the minimum rating converts to 0 and maximum to 10, and the other values in between 

6. Display the films rating-wise, highest to lowest


In [1]:
import numpy as np
np.set_printoptions(formatter={'float_kind':'{:.2f}'.format})

### Numpy array basics

In [None]:
mylist1 = [5, 23, 14, 2]
mylist2 = [[1,2,3],[4,5,6],[7,8,9]]
print(mylist1)
print(mylist2)

[5, 23, 14, 2]
[[1, 2, 3], [4, 5, 6], [7, 8, 9]]


In [None]:
mylist2[1][2]

6

In [None]:
myvalue = mylist2[1][2] * 35/2
print(myvalue)

105.0


In [None]:
a1 = np.array(mylist1)
print(a1)

[ 5 23 14  2]


In [None]:
print('[', a1[0], ',', a1[1], ',', a1[2], ',', a1[3], ']')

[ 5 , 23 , 14 , 2 ]


In [None]:
print(type(a1))

<class 'numpy.ndarray'>


In [None]:
print(mylist1)

[5, 23, 14, 2]


In [None]:
print(mylist2)

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]


In [None]:
a2 = np.array(mylist2)
print(a2)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [None]:
type(mylist2), type(a2)

(list, numpy.ndarray)

In [None]:
print(a1.shape)
print(a2.shape)

(4,)
(3, 3)


In [None]:
a3 = np.zeros((3,4))
print(a3)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [None]:
a3 = np.zeros((3,4), dtype=float)
print(a3)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [None]:
a4 = np.zeros((5,8), dtype=int)
print(a4)

[[0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0]]


In [None]:
a5 = np.ones((4,5), dtype=float)
print(a5)

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]


In [None]:
a6 = np.array([3,2,6,4,5])
print(a6)
print(a6.shape)
print(type(a6))

[3 2 6 4 5]
(5,)
<class 'numpy.ndarray'>


In [None]:
a6 = np.array([[3,2,6],[5,2,4],[4,5,8],[11,5,78]])
print(a6)
print(a6.shape)
print(type(a6))

[[ 3  2  6]
 [ 5  2  4]
 [ 4  5  8]
 [11  5 78]]
(4, 3)
<class 'numpy.ndarray'>


In [None]:
a6 = np.arange(22,34)
print(a6)
print(a6.shape)
print(type(a6))

[22 23 24 25 26 27 28 29 30 31 32 33]
(12,)
<class 'numpy.ndarray'>


### Task 1: Create a Numpy array of 1000 movie IDs starting from 1301

In [None]:
movie_id = np.arange(1301, 2301)
print(movie_id.shape)
print(movie_id[0:15])

(1000,)
[1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314
 1315]


In [None]:
print(movie_id[0])
print(movie_id[5])
print(movie_id[10])

1301
1306
1311


In [None]:
movie_id[10:20]

array([1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320])

In [None]:
print(movie_id[50:70])

[1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364
 1365 1366 1367 1368 1369 1370]


In [None]:
print(a2)
print(a2.shape)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
(3, 3)


In [None]:
a2[0:2, 1:3]

array([[2, 3],
       [5, 6]])

In [None]:
a2[0:1]

array([[1, 2, 3]])

In [None]:
a2 = np.array([[1,2,3,4,5], [2,3,4,5,6], [3,4,5,6,7],[4,5,6,7,8]])
print(a2.shape)
print(a2)

(4, 5)
[[1 2 3 4 5]
 [2 3 4 5 6]
 [3 4 5 6 7]
 [4 5 6 7 8]]


In [None]:
a2[2:4, 1:3]

array([[4, 5],
       [5, 6]])

In [None]:
a2[:, 1]

array([2, 3, 4, 5])

### Task 2: Create a matrix, movies_matrix, to store users rating such that:
- There are 100 users
- Each user can review as many movies as he/she wants
- The review should be in between 0 to 10 (both inclusive)
- Movie not reviewed will have value -1

In [None]:
b1 = np.full(10, 5)
print(b1)

[5 5 5 5 5 5 5 5 5 5]


In [None]:
b2 = np.full((5,8), 25)
print(b2)
print(type(b2))
print(b2.shape)

[[25 25 25 25 25 25 25 25]
 [25 25 25 25 25 25 25 25]
 [25 25 25 25 25 25 25 25]
 [25 25 25 25 25 25 25 25]
 [25 25 25 25 25 25 25 25]]
<class 'numpy.ndarray'>
(5, 8)


In [None]:
movie_matrix = np.full((100,1000), -1)
print(movie_matrix)
print(movie_matrix.shape)

[[-1 -1 -1 ... -1 -1 -1]
 [-1 -1 -1 ... -1 -1 -1]
 [-1 -1 -1 ... -1 -1 -1]
 ...
 [-1 -1 -1 ... -1 -1 -1]
 [-1 -1 -1 ... -1 -1 -1]
 [-1 -1 -1 ... -1 -1 -1]]
(100, 1000)


In [None]:
# get the reivews for one user (say user 0) and initialize to -1
numMovies = 1000
movies_rated_by_a_user = np.full(numMovies, -1)
print(movies_rated_by_a_user.shape)
print(movies_rated_by_a_user[0:50])

(1000,)
[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1]


In [None]:
import random
num_movies_rated = random.randint(0, numMovies-1)
print(num_movies_rated)

354


In [None]:
movies_that_user_rates = random.sample(range(numMovies), num_movies_rated)
print(len(movies_that_user_rates))
print(movies_that_user_rates)

354
[506, 304, 917, 278, 307, 773, 483, 636, 894, 397, 855, 293, 391, 863, 742, 613, 679, 615, 456, 882, 841, 829, 912, 497, 395, 277, 702, 518, 604, 184, 369, 822, 554, 701, 427, 631, 879, 832, 36, 738, 948, 108, 548, 784, 487, 575, 697, 21, 475, 616, 431, 928, 854, 991, 454, 371, 694, 179, 287, 770, 943, 857, 776, 507, 564, 619, 28, 643, 686, 569, 760, 887, 561, 508, 657, 5, 920, 930, 341, 971, 450, 513, 965, 862, 804, 414, 74, 723, 222, 319, 59, 305, 851, 680, 938, 812, 652, 676, 539, 258, 529, 37, 505, 848, 537, 651, 492, 259, 833, 891, 317, 843, 6, 898, 988, 38, 362, 850, 745, 11, 127, 122, 998, 286, 588, 583, 131, 584, 159, 23, 55, 469, 181, 68, 781, 565, 374, 200, 103, 298, 207, 43, 958, 521, 962, 332, 827, 327, 15, 836, 861, 348, 932, 996, 380, 495, 544, 360, 650, 419, 102, 644, 2, 444, 502, 198, 256, 212, 271, 385, 578, 205, 722, 303, 533, 762, 60, 929, 744, 820, 825, 596, 490, 858, 788, 276, 821, 687, 405, 749, 182, 476, 964, 654, 113, 299, 800, 870, 896, 873, 443, 641, 145, 

In [None]:
print(len(movies_rated_by_a_user))
print(movies_rated_by_a_user[0:25])
print(num_movies_rated)
print(len(movies_that_user_rates))
print(movies_that_user_rates[0:25])

1000
[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1]
354
354
[506, 304, 917, 278, 307, 773, 483, 636, 894, 397, 855, 293, 391, 863, 742, 613, 679, 615, 456, 882, 841, 829, 912, 497, 395]


In [None]:
# in movies_rated_by_a_user, we will update the user0's rating for each movie. These movies that he rates are given by
# the list movies_that_user_rates

# movies_rated_by_a_user[506] = random.randint(0, 10)
# movies_rated_by_a_user[304] = random.randint(0,10)
#...do this for all 354 movies that user0 is rating

for i  in movies_that_user_rates:
    movies_rated_by_a_user[i] = random.randint(0, 10)

print(movies_rated_by_a_user)

In [None]:
import random
# get the reivews for one user (say user 0) and initialize to -1
numMovies = 1000
movies_rated_by_a_user = np.full(numMovies, -1)
print('Initial movies rated by a user:---------------------------------------')
print(movies_rated_by_a_user[0:50])

num_movies_rated = random.randint(0, numMovies-1)
print('Number of movies rated by this user:---------------------------------------')
print(num_movies_rated)

movies_that_user_rates = random.sample(range(numMovies), num_movies_rated)
print('Indexes of movies that this user rates:---------------------------------------')
print(movies_that_user_rates)

for i  in movies_that_user_rates:
    movies_rated_by_a_user[i] = random.randint(0, 10)
print('Random ratings of this user:---------------------------------------')
print(movies_rated_by_a_user)

In [None]:
import random
def createMovieMatrix(numUsers, numMovies):
    movie_matrix = []
    for user in range(numUsers):
        movies_rated_by_a_user = np.full(numMovies, -1)
        num_movies_rated = random.randint(0, numMovies-1)
        movies_that_user_rates = random.sample(range(numMovies), num_movies_rated)
        for i  in movies_that_user_rates:
            movies_rated_by_a_user[i] = random.randint(0, 10)
        movie_matrix.append(movies_rated_by_a_user)
    movie_matrix = np.array(movie_matrix)
    return movie_matrix

In [None]:
numMovies = 1000
numUsers = 100
movie_matrix = createMovieMatrix(numUsers, numMovies)
print('Movie matrix details:')
print('Shape:', movie_matrix.shape)
print('10,10 slice of movie_matrix:')
print(movie_matrix[41:50, 400:410])

Movie matrix details:
Shape: (100, 1000)
10,10 slice of movie_matrix:
[[ 0  1 -1 -1 -1  0 -1 -1  1 -1]
 [-1  4 -1  5 -1  5 -1 -1  7 -1]
 [ 1  8  2 -1 -1  0 -1  4  2  1]
 [-1 -1  9 -1 -1  4 -1 -1 -1  0]
 [-1 -1  8  2 -1 -1 10 -1  0 -1]
 [-1 -1 -1  4  2  9 -1 -1  2  5]
 [ 0 -1 -1 -1 -1 -1 -1 -1 -1 -1]
 [ 4  1  4 10  1  2  8  4  2  5]
 [ 2  5  3  7  8  9  2  1  8  7]]


### Task 3:
- We have ten movie experts, let us take their reviews also
- Also, 50 new movies have to be added and their reviews

In [None]:
expert_matrix = createMovieMatrix(10, 1000)
print('Expert matrix details:')
print(expert_matrix.shape)
print(expert_matrix[3:6, 200:210])

Expert matrix details:
(10, 1000)
[[10 -1  9  7  3 -1  6  4 -1 -1]
 [ 3 -1 -1 -1  5  6  9 -1 -1  7]
 [-1  1  0 -1 -1 -1 -1 -1 -1 -1]]


In [None]:
movie_matrix = np.vstack([movie_matrix, expert_matrix])
print(movie_matrix.shape)
print(movie_matrix[50:60, 600:610])

(110, 1000)
[[ 7  4  5  2  8  5 10 -1  3  0]
 [ 3  8 -1 -1 -1  5 -1  4 -1  0]
 [-1 -1 -1 -1 -1  1 -1 -1 -1 -1]
 [ 5  0  4  5 -1  2  7  8  7 -1]
 [ 1 10  2  1  5 -1 -1  5 -1  9]
 [ 4  7  6  8  0  2 10  0  9  3]
 [ 6  4  9  3  6  2 10  8 10  8]
 [ 6  9  2  1  0  6  2  1  8 10]
 [ 0  3  1  9 10  6 -1  4  2  7]
 [-1 -1 -1 -1 -1  1  4 -1 -1 -1]]


In [None]:
new_movies_matrix = createMovieMatrix(110, 50)
print('New movie matrix details:')
print(new_movies_matrix.shape)
print(new_movies_matrix[3:6, 20:30])

New movie matrix details:
(110, 50)
[[-1  7 -1 -1  5 -1 -1  0 10  0]
 [ 3  8  4  3 -1 -1  5  9  0  7]
 [-1 -1 -1 -1 -1 -1 -1  8 -1  8]]


In [None]:
movie_matrix = np.hstack([movie_matrix, new_movies_matrix])
movie_matrix.shape

(110, 1050)

In [2]:
import random
def createMovieMatrix(numUsers, numMovies):
    movie_matrix = []
    for user in range(numUsers):
        movies_rated_by_a_user = np.full(numMovies, -1)
        num_movies_rated = random.randint(0, numMovies-1)
        movies_that_user_rates = random.sample(range(numMovies), num_movies_rated)
        for i  in movies_that_user_rates:
            movies_rated_by_a_user[i] = random.randint(0, 10)
        movie_matrix.append(movies_rated_by_a_user)
    movie_matrix = np.array(movie_matrix)
    return movie_matrix

movie_matrix = createMovieMatrix(100, 1000)
expert_matrix = createMovieMatrix(10, 1000)
new_movie_matrix = createMovieMatrix(110, 50)
movie_matrix = np.vstack([movie_matrix, expert_matrix])
movie_matrix = np.hstack([movie_matrix, new_movie_matrix])
print(movie_matrix.shape)
movie_id = np.arange(1301, 2350)

(110, 1050)


### Statistical Functions in Numpy

In [None]:
a = np.array([[5,4,6], [1,3,9], [7,2,8]])
print(a)

[[5 4 6]
 [1 3 9]
 [7 2 8]]


In [None]:
print('Minimum value of the array:', np.min(a))
print('Minimum value of each column:', np.min(a, 0))
print('Minimum value of each row:', np.min(a, 1))

Minimum value of the array: 1
Minimum value of each column: [1 2 6]
Minimum value of each row: [4 1 2]


In [None]:
print('Maximum value of the array:', np.max(a))
print('Maximum value of each column:', np.max(a, 0))
print('Maximum value of each row:', np.max(a, 1))

Maximum value of the array: 9
Maximum value of each column: [7 4 9]
Maximum value of each row: [6 9 8]


In [None]:
print('Mean of array a:', a.mean())
print('Std Dev of array a:', a.std())

Mean of array a: 5.0
Std Dev of array a: 2.581988897471611


In [None]:
print('Mean of array a:', a.mean())
print('Std Dev of array a:', round(a.std(),2))

Mean of array a: 5.0
Std Dev of array a: 2.58


### Task 4: Create average user rating matrix using movie_matrix

In [None]:
movie_matrix.shape

(110, 1050)

In [None]:
col = 165
m = movie_matrix[:, col]
print(m)

[-1 -1  0 -1 -1 -1 10  0  6  4  9 -1  1 -1  8  1 -1 -1  1 -1 -1 -1  9  0
 -1 -1  9  1  7 -1 -1  1 -1 -1 -1  3  7 -1  6 -1  5  4 -1 -1 -1  5 -1  2
  1  4 -1 -1  2 -1  0 -1  6  4 -1  2 -1 -1  4 -1  7  5  3 -1 -1 -1  4 -1
 -1 -1  7  0  6 -1 -1 -1  6 -1 -1 10 -1  0 -1 -1  3 -1  8  9 -1  2 -1 -1
  4 -1  9 -1  7 -1  4 10  2  2 -1 -1  1 -1]


In [None]:
m.mean()   # this is NOT CORRECt...because we have not filtered out unrated movies (with -1)

1.5818181818181818

#### Filtering

In [None]:
m = m[m >= 0]
print(m)

[ 0 10  0  6  4  9  1  8  1  1  9  0  9  1  7  1  3  7  6  5  4  5  2  1
  4  2  0  6  4  2  4  7  5  3  4  7  0  6  6 10  0  3  8  9  2  4  9  7
  4 10  2  2  1]


In [None]:
print(m.shape)
print(movie_matrix.shape)
mmshape = movie_matrix.shape
print(mmshape[0])
mmshape[1]
print(m.shape[0])

(53,)
(110, 1050)
110
53


In [None]:
col = 165
m = movie_matrix[:, col]
m = m[m >= 0]
# we need 4 values for each movie
print('Average:', round(m.mean(),2))
print('Std Dev:', round(m.std(),2))
print('No of ratings:', m.shape[0])
print('No of ratings:', m.size)
print('Movie id:', movie_id[col])

Average: 4.88
Std Dev: 2.66
No of ratings: 52
No of ratings: 52
Movie id: 1466


In [3]:
# for each movie in movie_matrix, we have to calculate the above 4 values
# and store that as row in the average ratings array
movie_id = np.arange(1301, 2351)
movie_stats = []
for col in range(1050):
    m = movie_matrix[:, col]
    m = m[m >= 0]
    movie_stats.append([movie_id[col], round(m.mean(),2), m.shape[0], round(m.std(),2)])
print(movie_stats)
movie_stats = np.array(movie_stats)
print(movie_stats.shape)
print(movie_stats[:10, :])


[[1301, 5.31, 62, 3.2], [1302, 5.02, 61, 3.12], [1303, 5.21, 68, 3.03], [1304, 5.07, 54, 3.17], [1305, 5.02, 65, 2.96], [1306, 5.46, 57, 3.13], [1307, 4.72, 60, 2.99], [1308, 5.2, 66, 3.22], [1309, 4.8, 56, 2.87], [1310, 5.07, 60, 3.0], [1311, 4.11, 64, 3.54], [1312, 4.52, 61, 3.23], [1313, 4.89, 66, 3.22], [1314, 5.32, 56, 2.92], [1315, 5.67, 64, 3.21], [1316, 4.55, 51, 3.36], [1317, 5.07, 55, 3.26], [1318, 5.04, 55, 3.24], [1319, 4.93, 56, 3.4], [1320, 4.77, 66, 3.22], [1321, 5.13, 60, 3.31], [1322, 4.95, 59, 3.41], [1323, 4.76, 55, 3.13], [1324, 5.15, 60, 2.95], [1325, 5.6, 53, 3.18], [1326, 4.59, 63, 3.26], [1327, 4.42, 64, 2.96], [1328, 4.47, 59, 3.15], [1329, 5.66, 61, 3.17], [1330, 5.11, 57, 3.46], [1331, 5.12, 68, 3.29], [1332, 5.0, 65, 3.23], [1333, 4.69, 55, 3.26], [1334, 4.53, 68, 3.11], [1335, 5.27, 55, 3.03], [1336, 4.97, 66, 2.91], [1337, 4.74, 58, 3.51], [1338, 5.29, 65, 3.28], [1339, 4.44, 71, 3.3], [1340, 5.1, 60, 2.83], [1341, 4.83, 59, 3.03], [1342, 5.12, 52, 3.02], 

In [None]:
# movie_id = np.arange(1301, 2351)
# movie_stats = np.zeros((1050,4), dtype='float')
# for movie in range(1050):
#     m = movie_matrix[:, movie]
#     m = m[m >= 0]
#     movie_stats[movie, 0] = movie_id[movie]
#     movie_stats[movie, 1] =  round(m.mean(),2)
#     movie_stats[movie, 2] = m.shape[0]
#     movie_stats[movie, 3] = round(m.std(),2)

# print(movie_stats)
# print(movie_stats.shape)
# print(movie_stats[:10, :])

In [None]:
print(movie_stats[0:100, 1])


[4.63 5.56 5.36 5.13 4.88 4.83 5.59 4.77 5.15 4.17 4.73 4.32 5.   4.53
 5.24 4.92 5.11 5.7  4.6  3.87 4.89 4.56 4.79 5.44 5.21 5.33 4.92 4.62
 4.29 5.53 5.24 4.6  4.57 4.7  5.53 4.53 5.11 5.09 5.23 4.96 4.9  4.67
 4.94 4.8  4.35 5.21 4.73 5.47 4.95 5.44 5.06 5.14 5.   5.1  5.3  5.02
 4.17 4.9  4.92 5.04 4.9  4.16 4.98 4.53 4.26 5.47 3.87 4.92 5.31 5.34
 5.6  5.25 4.47 4.47 4.45 4.14 4.24 4.47 4.84 4.98 5.13 5.65 5.36 4.98
 5.3  5.2  5.52 4.75 5.41 4.24 4.56 4.8  4.2  4.91 5.4  4.69 5.02 4.49
 4.25 5.28]


In [None]:
print(np.min(movie_stats, 0))
print(np.max(movie_stats, 0))

[1301.      3.68   39.      2.49]
[2350.      6.19   68.      3.63]


### Addition etc operations on arrays

In [None]:
a = np.array([[2,0],[4,9]])
b = np.array([[1,4],[3,6]])
print(a)
print(b)

[[2 0]
 [4 9]]
[[1 4]
 [3 6]]


In [None]:
print(a+b)

[[ 3  4]
 [ 7 15]]


In [None]:
np.add(a,b)

array([[ 3,  4],
       [ 7, 15]])

In [None]:
a - b

array([[ 1, -4],
       [ 1,  3]])

In [None]:
np.subtract(a,b)

array([[ 1, -4],
       [ 1,  3]])

In [None]:
print(a)
print(b)

[[2 0]
 [4 9]]
[[1 4]
 [3 6]]


In [None]:
print(a)
print(a*3)

[[2 0]
 [4 9]]
[[ 6  0]
 [12 27]]


In [None]:
print(a)
print(a/3)

[[2 0]
 [4 9]]
[[0.66666667 0.        ]
 [1.33333333 3.        ]]


In [None]:
print(a)
print(np.around(a/3,2))

[[2 0]
 [4 9]]
[[0.67 0.  ]
 [1.33 3.  ]]


### Task 5: Also convert the average movie ratings to have range from 0 to 10, such that the minimum rating converts to 0 and maximum to 10, and the other values in between

In [None]:
print(movie_stats.shape)
movie_stats[100:110, :]

(1050, 4)


array([[1401.  ,    5.07,   55.  ,    3.02],
       [1402.  ,    5.54,   57.  ,    3.01],
       [1403.  ,    4.06,   49.  ,    3.15],
       [1404.  ,    5.83,   54.  ,    3.23],
       [1405.  ,    4.71,   56.  ,    3.17],
       [1406.  ,    5.32,   56.  ,    3.12],
       [1407.  ,    4.61,   56.  ,    3.14],
       [1408.  ,    5.35,   55.  ,    3.23],
       [1409.  ,    4.92,   52.  ,    3.18],
       [1410.  ,    4.6 ,   48.  ,    2.97]])

In [9]:
print('Minmum for columns:', np.amin(movie_stats,0))
print('Maxmum for columns:', np.amax(movie_stats,0))

Minmum for columns: [1301.      3.8    44.      2.52]
Maxmum for columns: [2350.      6.13   73.      3.73]


In [None]:
# Task5 boils down to scaing the range (3.68 , 6.18) TO (0, 10)

In [6]:
ar = movie_stats[:, 1]
print('Max:', ar.max(), 'Min:', ar.min())
ar_range = ar.max() - ar.min()
print(ar_range)
or_range = 10 - 0
print(or_range)

Max: 6.13 Min: 3.8
2.33
10


In [8]:
movie_stats[0:10, :]

array([[1301.  ,    5.31,   62.  ,    3.2 ],
       [1302.  ,    5.02,   61.  ,    3.12],
       [1303.  ,    5.21,   68.  ,    3.03],
       [1304.  ,    5.07,   54.  ,    3.17],
       [1305.  ,    5.02,   65.  ,    2.96],
       [1306.  ,    5.46,   57.  ,    3.13],
       [1307.  ,    4.72,   60.  ,    2.99],
       [1308.  ,    5.2 ,   66.  ,    3.22],
       [1309.  ,    4.8 ,   56.  ,    2.87],
       [1310.  ,    5.07,   60.  ,    3.  ]])

In [11]:
movie0rating = movie_stats[0,1]
print('Movie 0 avg. rating:', movie0rating)
print('Distance from min:', movie0rating - ar.min())
startOfNewRange = 0

print('ratio of ranges:', or_range/ar_range)
newRating = (movie0rating - ar.min()) * (or_range/ar_range) + startOfNewRange
print('movie 0 new Rating:', newRating)

Movie 0 avg. rating: 5.31
Distance from min: 1.5099999999999998
ratio of ranges: 4.291845493562231
movie 0 new Rating: 6.480686695278968


In [13]:
ar = movie_stats[:, 1]
ar_range = ar.max() - ar.min()
new_range = 10 - 0
startOfNewRange = 0

nr = np.around((ar - ar.min()) * (new_range/ar_range) + startOfNewRange, 2)
nr[0:10]

array([6.48, 5.24, 6.05, 5.45, 5.24, 7.12, 3.95, 6.01, 4.29, 5.45])

In [15]:
nr.max(), nr.min(), nr.shape

(10.0, 0.0, (1050,))

In [16]:
movie_stats[0:10, :]

array([[1301.  ,    5.31,   62.  ,    3.2 ],
       [1302.  ,    5.02,   61.  ,    3.12],
       [1303.  ,    5.21,   68.  ,    3.03],
       [1304.  ,    5.07,   54.  ,    3.17],
       [1305.  ,    5.02,   65.  ,    2.96],
       [1306.  ,    5.46,   57.  ,    3.13],
       [1307.  ,    4.72,   60.  ,    2.99],
       [1308.  ,    5.2 ,   66.  ,    3.22],
       [1309.  ,    4.8 ,   56.  ,    2.87],
       [1310.  ,    5.07,   60.  ,    3.  ]])

In [22]:
nr

array([6.48, 5.24, 6.05, ..., 3.91, 6.39, 5.32])

In [36]:
movie_stats[:, 1] = nr

In [37]:
movie_stats

array([[1301.  ,    6.48,   62.  ,    3.2 ],
       [1302.  ,    5.24,   61.  ,    3.12],
       [1303.  ,    6.05,   68.  ,    3.03],
       ...,
       [2348.  ,    3.91,   55.  ,    3.23],
       [2349.  ,    6.39,   45.  ,    3.24],
       [2350.  ,    5.32,   51.  ,    2.96]])

In [39]:
movie_stats[:,1].min(), movie_stats[:, 1].max()

(0.0, 10.0)

In [18]:
a = np.array([[2,3],[5,6]])
print(a)

[[2 3]
 [5 6]]


In [20]:
a[1,1] = 8
print(a)

[[2 3]
 [5 8]]


In [21]:
a[:, 1] = np.array([11,15])
print(a)

[[ 2 11]
 [ 5 15]]


### Task 6: Display the films rating-wise, highest to lowest

In [23]:
a = np.array([27, 35, 11, 19, 81])
print('Original array:', a)

Original array: [27 35 11 19 81]


In [24]:
aSortedIndexes =  np.argsort(a)
print(aSortedIndexes)
print(a)

[2 3 0 1 4]
[27 35 11 19 81]


In [25]:
a[aSortedIndexes]

array([11, 19, 27, 35, 81])

In [26]:
newOrder = np.array([3,2,1,4,0])

In [28]:
b = a[newOrder]
print(b)

[19 11 35 81 27]


In [33]:
mr = movie_stats[:, 1]
sortedIndexes = mr.argsort()
print(mr)
print(sortedIndexes)

[6.48 5.24 6.05 ... 3.91 6.39 5.32]
[655 462 573 ... 605 217 244]


In [34]:
mr[sortedIndexes]

array([ 0.  ,  0.  ,  0.09, ...,  9.66,  9.87, 10.  ])

In [45]:
ordMovie_stats = movie_stats[sortedIndexes]
print(ordMovie_stats)

[[1956.00 0.00 55.00 2.88]
 [1763.00 0.00 56.00 3.14]
 [1874.00 0.09 61.00 3.26]
 ...
 [1906.00 9.66 59.00 3.11]
 [1518.00 9.87 60.00 3.31]
 [1545.00 10.00 62.00 3.00]]


In [49]:
np.set_printoptions(formatter={'float_kind':'{:.1f}'.format})

In [50]:
ordMovie_stats = movie_stats[sortedIndexes]
print(ordMovie_stats)

[[1956.0 0.0 55.0 2.9]
 [1763.0 0.0 56.0 3.1]
 [1874.0 0.1 61.0 3.3]
 ...
 [1906.0 9.7 59.0 3.1]
 [1518.0 9.9 60.0 3.3]
 [1545.0 10.0 62.0 3.0]]


In [58]:
print(ordMovie_stats.shape)
type(ordMovie_stats)

(1050, 4)


numpy.ndarray

In [53]:
a

array([27, 35, 11, 19, 81])

In [54]:
print(a)

[27 35 11 19 81]


In [59]:
def myprint(a):
  print('[', int(a[0,0]) ,a[0,1],a[0,2], a[0,3],']')

myprint(ordMovie_stats)

[ 1956 0.0 55.0 2.88 ]


In [60]:
mr = movie_stats[:, 1]
sortedIndexes = mr.argsort()
ordMovie_stats = movie_stats[sortedIndexes]
print(ordMovie_stats)

[[1956.0 0.0 55.0 2.9]
 [1763.0 0.0 56.0 3.1]
 [1874.0 0.1 61.0 3.3]
 ...
 [1906.0 9.7 59.0 3.1]
 [1518.0 9.9 60.0 3.3]
 [1545.0 10.0 62.0 3.0]]


In [61]:
mr = movie_stats[:, 1]
descSortedIndexes = mr.argsort()[::-1]
ordMovie_stats = movie_stats[descSortedIndexes]
print(ordMovie_stats)

[[1545.0 10.0 62.0 3.0]
 [1518.0 9.9 60.0 3.3]
 [1906.0 9.7 59.0 3.1]
 ...
 [1874.0 0.1 61.0 3.3]
 [1763.0 0.0 56.0 3.1]
 [1956.0 0.0 55.0 2.9]]


### Saving arrays to text file and Loading arrays from text file

In [65]:
np.savetxt('movieRatingsOrdered.csv', ordMovie_stats, delimiter= ';', fmt='%.1f')

In [64]:
moviesFromFile = np.genfromtxt('movieRatingsOrdered.csv', delimiter=';')
print(type(moviesFromFile))
print(moviesFromFile)

<class 'numpy.ndarray'>
[[1545.0 10.0 62.0 3.0]
 [1518.0 9.9 60.0 3.3]
 [1906.0 9.7 59.0 3.1]
 ...
 [1874.0 0.1 61.0 3.3]
 [1763.0 0.0 56.0 3.1]
 [1956.0 0.0 55.0 2.9]]
