In [8]:
import findspark

findspark.init()

In [9]:

from pyspark.sql import SparkSession
from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql.types import DateType, TimestampType, IntegerType, FloatType, LongType, DoubleType,StringType
from pyspark.sql.types import StructType, StructField
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [10]:
spark = SparkSession.builder \
       .master("local") \
       .enableHiveSupport() \
       .appName("Spark ML") \
       .getOrCreate()
sc = spark.sparkContext
sqlContext = SQLContext(sc)

In [11]:



Custom_schema = StructType([StructField('_c0',StringType()),
                           StructField('_c1', StringType(), True),
                           StructField('_c2', StringType(), True),
                           StructField('_c3', StringType(), True),
                           StructField('_c4', StringType(), True),
                           StructField('_c5', IntegerType(), True),
                            StructField('_c6', StringType(), True),
                           StructField('_c7', IntegerType(), True),
                           StructField('_c8', IntegerType(), True),
                           StructField('_c9', IntegerType(), True),
                           StructField('_c10', IntegerType(), True),
                            StructField('_c11', IntegerType(), True)]
                            )




In [12]:

df = spark.read.csv('abc.csv', header=True, schema=Custom_schema,  sep=',')
df.head()
df = (df
   .withColumnRenamed('_c0','Registrar')
   .withColumnRenamed('_c1', 'agency')
   .withColumnRenamed('_c2', 'State')
   .withColumnRenamed('_c3', 'District')
    .withColumnRenamed('_c4', 'SubDistrict')
    .withColumnRenamed('_c5','PinCode')
   .withColumnRenamed('_c6', 'Gender')
   .withColumnRenamed('_c7', 'Age')
   .withColumnRenamed('_c8', 'generated')
    .withColumnRenamed('_c9', 'rejected')
    .withColumnRenamed('_c10', 'email')
    .withColumnRenamed('_c11', 'mobile')
   
      
          )


In [13]:
df.printSchema()


root
 |-- Registrar: string (nullable = true)
 |-- agency: string (nullable = true)
 |-- State: string (nullable = true)
 |-- District: string (nullable = true)
 |-- SubDistrict: string (nullable = true)
 |-- PinCode: integer (nullable = true)
 |-- Gender: string (nullable = true)
 |-- Age: integer (nullable = true)
 |-- generated: integer (nullable = true)
 |-- rejected: integer (nullable = true)
 |-- email: integer (nullable = true)
 |-- mobile: integer (nullable = true)



In [14]:
df.registerTempTable("data")
sqlContext.sql("SELECT DISTINCT State FROM data ORDER BY State ASC").show(37)
sqlContext.sql("SELECT COUNT(Gender) FROM data WHERE Gender!='F' AND Gender!='T'").show()
sqlContext.sql("SELECT COUNT(Gender) FROM data WHERE Gender!='M' and Gender!='T'").show()
sqlContext.sql("SELECT COUNT(Gender) FROM data WHERE Gender!='F' AND Gender!='M'").show()


+--------------------+
|               State|
+--------------------+
|Andaman and Nicob...|
|      Andhra Pradesh|
|   Arunachal Pradesh|
|               Assam|
|               Bihar|
|          Chandigarh|
|        Chhattisgarh|
|Dadra and Nagar H...|
|       Daman and Diu|
|               Delhi|
|                 Goa|
|             Gujarat|
|             Haryana|
|    Himachal Pradesh|
|   Jammu and Kashmir|
|           Jharkhand|
|           Karnataka|
|              Kerala|
|         Lakshadweep|
|      Madhya Pradesh|
|         Maharashtra|
|             Manipur|
|           Meghalaya|
|             Mizoram|
|            Nagaland|
|              Odisha|
|              Others|
|          Puducherry|
|              Punjab|
|           Rajasthan|
|              Sikkim|
|          Tamil Nadu|
|           Telangana|
|             Tripura|
|       Uttar Pradesh|
|         Uttarakhand|
|         West Bengal|
+--------------------+

+-------------+
|count(Gender)|
+-------------+
|       

In [15]:
#agencies with max no of registrations
md = sqlContext.sql("SELECT Registrar, SUM(generated) AS TotalAadharCount FROM data GROUP BY Registrar ORDER BY TotalAadharCount DESC LIMIT 3").collect()


In [16]:
#no of identities generated in each state
sqlContext.sql("SELECT state, sum(generated) as TotalAadharCount FROM data GROUP BY State ORDER BY TotalAadharCount DESC LIMIT 10").collect()

[Row(state='Bihar', TotalAadharCount=162607),
 Row(state='West Bengal', TotalAadharCount=119901),
 Row(state='Uttar Pradesh', TotalAadharCount=103767),
 Row(state='Madhya Pradesh', TotalAadharCount=53276),
 Row(state='Rajasthan', TotalAadharCount=39570),
 Row(state='Gujarat', TotalAadharCount=34844),
 Row(state='Tamil Nadu', TotalAadharCount=32485),
 Row(state='Maharashtra', TotalAadharCount=26085),
 Row(state='Karnataka', TotalAadharCount=19764),
 Row(state='Odisha', TotalAadharCount=18182)]

In [18]:
#no of identities generated in each state for male and females
md1 = sqlContext.sql("SELECT state, sum(generated) as TotalAadharCount FROM data WHERE Gender!='T' GROUP BY State ORDER BY TotalAadharCount DESC LIMIT 10").collect()



In [19]:
states = []*len(md1)
total_count1 = []*len(md1)
for i in range(len(md1)):
    states.append(md1[i][0])
for j in range(len(md1)):
    total_count1.append(md1[j][1])
    
    


In [20]:
aadhar_company = []*len(md)
total_count = []*len(md)
for i in range(len(md)):
    aadhar_company.append(md[i][0])
for j in range(len(md)):
    total_count.append(md[j][1])
    
    
  

In [22]:
import tkinter as tk
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
from tkinter import *
window = Tk()
window.title("Welcome")

def plot():
    window1 = tk.Toplevel(window)
    fig = Figure(figsize=(6,6))
    a = fig.add_subplot(111)
    a.bar(aadhar_company,total_count,color=['blue', 'red', 'green'])


    a.set_title ("Estimation Grid", fontsize=16)
    a.set_ylabel("Y", fontsize=14)
    a.set_xlabel("X", fontsize=10)
    a.set_xticks(aadhar_company)
    canvas = FigureCanvasTkAgg(fig, master= window1)
    canvas.get_tk_widget().pack()
    canvas.draw()
    button = Button(window1, text="Exit", command=window1.destroy)
    button.pack(side=BOTTOM, pady = 10)
    
    
def piechart():
    window2 = tk.Toplevel(window)
    explode = (0, 0.1, 0, 0.1,0,0,0,0,0.1,0.1)  # only "explode" the 2nd slice (i.e. 'Hogs')
    fig = Figure(figsize=(7,7))
    ax1 = fig.add_subplot(111)
    ax1.set_title("Top 10 states with Maximum Aadhar generated\n", fontsize=10,horizontalalignment='center', verticalalignment='top')
    ax1.pie(total_count1, explode=explode, labels=states, autopct='%1.1f%%',shadow=True, startangle=90)
    
    ax1.axis('equal')# Equal aspect ratio ensures that pie is drawn as a circle.
    
    canvas = FigureCanvasTkAgg(fig, master= window2)
    canvas.get_tk_widget().pack()
    canvas.draw()
    button = Button(window2, text="Exit", height= 1 , width = 6, command=window2.destroy)
    button.pack(side=BOTTOM, pady = 10)



save = Button(window, text = 'PLOT',height = 1,width=6, command = plot) 
save.pack(side = RIGHT, pady = 10) 
save1 = Button(window, text = 'pie',height=1,width=6, command = piechart) 
save1.pack(side = RIGHT, pady = 15)
button = Button(window, text="Exit", height=1, width=6, command=window.destroy)
button.place(relx = 0.5, rely = 0.5, anchor = CENTER) 
window.geometry("400x400")
window.configure(background='grey')
window.mainloop()

In [22]:
from bokeh.io import curdoc
from bokeh.io import output_file, show
from bokeh.palettes import Category20c
from bokeh.plotting import figure
from bokeh.transform import cumsum
from bokeh.layouts import row
from bokeh.layouts import column
from math import pi
from bokeh.models.widgets import Tabs, Panel 
from bokeh.models import Button
from bokeh.models import CheckboxGroup, RadioGroup, Toggle
from bokeh.models import NumeralTickFormatter
from bokeh.models import ColumnDataSource, CDSView, GroupFilter

In [23]:
x = dict(md1)
data = pd.Series(x).reset_index(name='value').rename(columns={'index':'country'})
data['angle'] = data['value']/data['value'].sum() * 2*pi
data['percent'] = data['value'] / sum(x.values()) * 100
        
data['color'] = Category20c[len(x)]

p = figure(plot_height=350, title="Pie Chart", toolbar_location=None,
           tools="hover", tooltips="@country: @value", x_range=(-0.5, 1.0))

p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend='country', source=data)

p1 = figure(x_range=aadhar_company, y_range=[0,1000000], plot_height=250, title="Agencies with max no of registrations",
           toolbar_location=None, tools="")

p1.vbar(x=aadhar_company, top= total_count, width=0.8)
p1.yaxis.formatter=NumeralTickFormatter(format="00")
p1.xgrid.grid_line_color = None
p1.y_range.start = 0



p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None
toggle = Toggle(label='Toggle button',button_type='success')

# Add a CheckboxGroup: checkbox
checkbox = CheckboxGroup(labels=['Option 1', 'Option 2', 'Option 3'])


first =Panel(child= column(p,toggle,checkbox), title='Top 10 states')
second = Panel(child= p1, title='second')
tabs = Tabs(tabs=[first, second])
output_file('tabbed.html')

curdoc().add_root(tabs)



# Add widgetbox(toggle, checkbox, radio) to the current document

show(tabs)
