In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### Import all necessary libraries for data visualization

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
warnings.filterwarnings(action='ignore')

### DataFrame overview

In [None]:
df = pd.read_csv('../input/cbse-schools-data/schools_detailed.csv')
df.head()

### Get only Hyderabad based Schools

In [None]:
hyderabad_df = df[df['district'].isin(['HYDERABAD','RANGAREDDY'])]
hyderabad_df.shape

In [None]:
hyderabad_df.head()

### Getting all columns in the dataframe

In [None]:
", ".join(df.columns.to_list())

### Group columns

In [None]:
school_addresses = ['name', 'aff_no', 'state', 'district', 'region', 'address', 'pincode', 'ph_no', 'off_ph_no', 'res_ph_no', 'fax_no', 'email', 'website']
school_foundation = ['name', 'aff_no','year_found', 'date_opened']
school_principal = ['name', 'aff_no','princi_qual', 'princi_exp_adm', 'princi_exp_teach']
school_aff_details = ['name','aff_no','aff_type', 'aff_start', 'aff_end']
school_details = ['name','aff_no','status','n_medium','n_school_type']
school_surroundings = ['name','aff_no','l_nearest_railway', 'l_nearest_railway_dist', 'l_nearest_police', 'l_nearest_police_dist', 'l_nearest_bank', 'l_nearest_bank_dist']
school_sections = ['name','aff_no','e_xi_xii_students' ,'e_i_sections' ,'e_i_students' ,'e_ii_sections' ,'e_ii_students' ,'e_iii_sections' ,'e_iii_students' ,'e_iv_sections' ,'e_iv_students' ,'e_v_sections' ,'e_v_students' ,'e_vi_sections' ,'e_vi_students' ,'e_vii_sections' ,'e_vii_students' ,'e_viii_sections' ,'e_viii_students' ,'e_ix_sections' ,'e_ix_students' ,'e_x_sections' ,'e_x_students' ,'e_xi_sections' ,'e_xi_students' ,'e_xii_sections' ,'e_xii_students']
school_class_dimensions = ['name','aff_no','i_classrooms_no', 'i_classrooms_length', 'i_classrooms_breadth']
school_labs = ['name','aff_no','i_composite_lab_no','i_composite_lab_length','i_composite_lab_breadth','i_phy_lab_no','i_phy_lab_length','i_phy_lab_breadth','i_chem_lab_no','i_chem_lab_length','i_chem_lab_breadth','i_bio_lab_no','i_bio_lab_length','i_bio_lab_breadth','i_biotech_lab_no','i_biotech_lab_length','i_biotech_lab_breadth','i_math_lab_no','i_math_lab_length','i_math_lab_breadth','i_cs_lab_no','i_cs_lab_length','i_cs_lab_breadth','i_home_lab_no','i_home_lab_length','i_home_lab_breadth','i_library_no','i_library_length','i_library_breadth','i_other_lab_no','i_other_lab_length','i_other_lab_breadth']
school_dimensions = ['name','aff_no','p_area_meter', 'p_area_acre', 'p_area_builtup_meter', 'p_num_sites', 'p_area_playground']
school_amenities = ['name','aff_no','f_swimming_pool', 'f_indoor_games', 'f_dance_rooms', 'f_gym', 'f_music_rooms', 'f_hostel', 'f_health_checkup']

### Create all DataFrames w.r.t the columns

In [None]:
school_addresses_df = hyderabad_df[school_addresses]
school_foundation_df = hyderabad_df[school_foundation]
school_principal_df = hyderabad_df[school_principal]
school_aff_details_df = hyderabad_df[school_aff_details]
school_details_df = hyderabad_df[school_details]
school_surroundings_df = hyderabad_df[school_surroundings]
school_sections_df = hyderabad_df[school_sections]
school_class_dimensions_df = hyderabad_df[school_class_dimensions]
school_labs_df = hyderabad_df[school_labs]
school_dimensions_df = hyderabad_df[school_dimensions]
school_amenities_df = hyderabad_df[school_amenities]

### Total Number of schools with swimming pool

In [None]:
sns.countplot(school_amenities_df['f_swimming_pool'])

### Try to find all the schools near to Gachibowli/HiTech City as being an IT Hub

In [None]:
school_addresses_df['pincode'].fillna(0,inplace=True)
school_addresses_df['pincode'] = school_addresses_df['pincode'].astype(int)
school_addresses_df['distance_from_gachibowli'] = abs(school_addresses_df['pincode'] - 500032)
school_addresses_df.sort_values('distance_from_gachibowli', ascending=True).head(5)

### Get schools with all amenities, probably useful for parents where both are working professionals

In [None]:
school_amenities_df[(school_amenities_df['f_swimming_pool'].isin(['yes']))
& (school_amenities_df['f_indoor_games'].isin(['yes']))
& (school_amenities_df['f_dance_rooms'].isin(['yes']))
& (school_amenities_df['f_gym'].isin(['yes']))
& (school_amenities_df['f_music_rooms'].isin(['yes']))
& (school_amenities_df['f_hostel'].isin(['yes']))
& (school_amenities_df['f_health_checkup'].isin(['yes']))]

### All the schools near to Gachibowli

In [None]:
plt.figure(figsize=(6,4))
schools_with_playground_area_df = pd.merge(school_dimensions_df, school_addresses_df[['address','pincode','distance_from_gachibowli']],left_index=True, right_index=True) \
.query("p_area_playground > 0").sort_values(['distance_from_gachibowli','p_area_playground'],ascending=[True,False])
sns.scatterplot('distance_from_gachibowli','p_area_playground',data=schools_with_playground_area_df.query("distance_from_gachibowli < 20"))
plt.xlabel("Distance from Gachibowli")
plt.ylabel("Schools with large playground area")

### Schools with good playground area with amenities

In [None]:
school_playground_amenities_df = pd.merge(schools_with_playground_area_df \
.query("distance_from_gachibowli < 20"), school_amenities_df,left_on=['name','aff_no'],right_on=['name','aff_no'])
school_playground_amenities_df.head()

### Scatter plot showing schools with and without swimming pool

In [None]:
fig = px.scatter(school_playground_amenities_df.query("distance_from_gachibowli < 20"),
               'distance_from_gachibowli','p_area_playground',
                 size='p_area_playground',
                 hover_name='name',
                 color='f_swimming_pool',
                labels={
                     "distance_from_gachibowli": "Distance relative to Gachibowli",
                     "p_area_playground": "Playground Area"
                 },
                 width=600, height=400,
                title="Relationship of distance and playground area with swimming pool")
fig.show()

### Scatter plot showing schools with and without hostel facility

In [None]:
fig = px.scatter(school_playground_amenities_df.query("distance_from_gachibowli < 20"),
               'distance_from_gachibowli','p_area_playground',
                 size='p_area_playground',
                 hover_name='name',
                 hover_data=['address'],
                 color='f_hostel',
                labels={
                     "distance_from_gachibowli": "Distance relative to Gachibowli",
                     "p_area_playground": "Playground Area"
                 },
                 width=600, height=400,
                title="Relationship of distance and playground area with hostel facility")
fig.show()

### School types

In [None]:
px.histogram(school_details_df['n_school_type'],width=600, height=400)

### Get all Government and Govt Aided schools

In [None]:
school_details_df[school_details_df['n_school_type'].isin(['Govt','Govt Aided'])]

### Distribution of School status

In [None]:
px.histogram(school_details_df['status'],width=600, height=400)

In [None]:
school_labs_df['i_cs_lab_area'] = school_labs_df['i_cs_lab_length'] * school_labs_df['i_cs_lab_breadth']

In [None]:
#school_labs_df.head()
px.histogram(school_labs_df['i_cs_lab_area'],width=600, height=400)

In [None]:
school_lab_size_df = pd.merge(school_addresses_df \
.query("distance_from_gachibowli < 20"), school_labs_df,left_on=['name','aff_no'],right_on=['name','aff_no'])
school_lab_size_df.head()

### Replace all NaN values to 0

In [None]:
school_labs_df['i_cs_lab_area'].fillna(0,inplace=True)

### Find all the schools with good Computer Science Lab

In [None]:
fig = px.scatter(school_lab_size_df[~school_lab_size_df['i_cs_lab_area'].isna()],
               'distance_from_gachibowli','i_cs_lab_area',
                 size='i_cs_lab_area',
                 hover_name='name',
                 hover_data=['address'],
                labels={
                     "distance_from_gachibowli": "Distance relative to Gachibowli",
                     "i_cs_lab_area": "Computer Science Lab Area"
                 },
                 width=600, height=400,
                title="Relationship of distance and computer science lab area")
fig.show()

### Find the class area

In [None]:
school_class_dimensions_df['i_classrooms_area'] = school_class_dimensions_df['i_classrooms_length'] * school_class_dimensions_df['i_classrooms_breadth']
school_class_dimensions_df.head()

### Schools with good classroom area

In [None]:
px.scatter(school_class_dimensions_df[~school_class_dimensions_df['i_classrooms_area'].isna()],
           x='i_classrooms_length'
           ,y='i_classrooms_breadth'
          ,hover_name='name',
           size='i_classrooms_area',
          labels={'i_classrooms_length':'Class Length',
                  'i_classrooms_breadth':'Class Breadth'},
          width=600, height=400,
          title="Size of the class room")