# Spark code to Calculate Average of Friends a person have by Age

In [1]:
from pyspark import SparkConf, SparkContext
import collections
import time

import matplotlib.pyplot as plt

## Setup

In [2]:
conf = SparkConf().setMaster('local').setAppName('FriendsByAge')
sc = SparkContext(conf = conf)

## Processing files

In [10]:
def parseLine(line):
    fields = line.split(',')
    stationID = fields[0]
    entryType = fields[2]
    temperature = float(fields[3])/10
    return (stationID, entryType, temperature)

In [11]:
lines = sc.textFile('data/1800.csv')
rdd = lines.map(parseLine)

In [13]:
rdd.take(10)

[('ITE00100554', 'TMAX', -7.5),
 ('ITE00100554', 'TMIN', -14.8),
 ('GM000010962', 'PRCP', 0.0),
 ('EZE00100082', 'TMAX', -8.6),
 ('EZE00100082', 'TMIN', -13.5),
 ('ITE00100554', 'TMAX', -6.0),
 ('ITE00100554', 'TMIN', -12.5),
 ('GM000010962', 'PRCP', 0.0),
 ('EZE00100082', 'TMAX', -4.4),
 ('EZE00100082', 'TMIN', -13.0)]

### Extracting everything but TMIN

In [14]:
minTemps = rdd.filter(lambda x: "TMIN" in x[1])

In [22]:
minTemps.collect()[:10]

[('ITE00100554', 'TMIN', -14.8),
 ('EZE00100082', 'TMIN', -13.5),
 ('ITE00100554', 'TMIN', -12.5),
 ('EZE00100082', 'TMIN', -13.0),
 ('ITE00100554', 'TMIN', -4.6),
 ('EZE00100082', 'TMIN', -7.3),
 ('ITE00100554', 'TMIN', -1.3),
 ('EZE00100082', 'TMIN', -7.4),
 ('ITE00100554', 'TMIN', -0.6),
 ('EZE00100082', 'TMIN', -5.8)]

In [27]:
stationTemps = minTemps.map(lambda x: (x[0],x[2]))

In [29]:
stationTemps.collect()[:10]

[('ITE00100554', -14.8),
 ('EZE00100082', -13.5),
 ('ITE00100554', -12.5),
 ('EZE00100082', -13.0),
 ('ITE00100554', -4.6),
 ('EZE00100082', -7.3),
 ('ITE00100554', -1.3),
 ('EZE00100082', -7.4),
 ('ITE00100554', -0.6),
 ('EZE00100082', -5.8)]

In [30]:
minTempsPerStation = stationTemps.reduceByKey(lambda x,y: min(x,y))

In [31]:
minTempsPerStation.collect()[:10]

[('ITE00100554', -14.8), ('EZE00100082', -13.5)]

In [32]:
results = minTempsPerStation.collect()

In [36]:
for result in results:
    print(f"{result[0]} \t{result[1]}C")

ITE00100554 	-14.8C
EZE00100082 	-13.5C
