In [1]:
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd
plt.rc("figure", figsize=(10, 6))
np.set_printoptions(precision=4)
pd.options.display.max_columns = 20
pd.options.display.max_rows = 20
pd.options.display.max_colwidth = 80

## General idea

In this notebook, we are building an index consisting of the following:

$Indice\_tecnológico_{i} = \sum_{k=1}^{} \frac{Equipos\_totales_{k}}{Total\_matricula_{k}}$, donde $Equipos\_totales_{k} = Numero\_equipos\_totales_{k} - Numero\_equipos\_sin\_utilizar_{k}$, donde $k = Institucion$,  $i = Estudiante$.

## Load data

In [2]:
# First we are loading the technology data from census:

df1 = pd.read_csv("Tenencia y número de equipos de cómputo por sede educativa.csv",
                   delimiter=";")          # año 2019

df2 = pd.read_csv("Tenencia y número de bienes TIC por sede educativa.CSV",
                    delimiter=",",
                    encoding="ISO-8859-1") # año 2022

In [3]:
# Now we are loading the enrollment data from the census

df3 = pd.read_csv("Alumnos matrículados en básica secundaria y media según carácter y especialidad por jornada.csv",
                  delimiter=";")           # año 2019

df4 = pd.read_csv("Alumnos matriculados en básica secundaria y media según carácter y especialidad por jornada2.CSV",
                  delimiter=",",
                  encoding="ISO-8859-1")           # año 2022

## Concat the data

In [4]:
df12 = pd.concat([df1, df2], ignore_index=True)  # Concat the technology data

df34 = pd.concat([df3, df4], ignore_index=True)  # Concat the enrollment data

## Technology computation

In [5]:
# Now we are calculating the total numbers of technology devices: Total_computers = Computers_used - Computers_not_used

df12['TOTAL_DISPOSITIVOS_TEC'] = df12['SEDECOM_CANTIDAD'] - df12['SEDECOM_CANT_SINUSO']

In [6]:
# Now we groupby institution and year, and then compute the total number of technology devices:

df12 = (df12.groupby(['SEDE_CODIGO', 'PERIODO_ANIO'])['TOTAL_DISPOSITIVOS_TEC']
        .sum()
        .reset_index())

df12

Unnamed: 0,SEDE_CODIGO,PERIODO_ANIO,TOTAL_DISPOSITIVOS_TEC
0,105001000001,2019,68
1,105001000001,2022,265
2,105001000027,2019,49
3,105001000027,2022,49
4,105001000043,2019,58
...,...,...,...
87664,499760000994,2019,12
87665,499760000994,2022,10
87666,499760001109,2019,12
87667,499760001125,2019,11


## Enrollment computation

In [7]:
# The new total number of students enrollments are the sum of all students in Educación basica secundaria + Educación media

df34["TOTAL_ESTUDIANTES"] = df34['JORNESP_CANTIDAD_HOMBRE'] + df34['JORNESP_CANTIDAD_MUJER']

In [8]:
# Now we groupby institution and year, and then compute the total numbers of students enrolled:

df34 = (df34.groupby(['SEDE_CODIGO', 'PERIODO_ANIO'])['TOTAL_ESTUDIANTES']
        .sum()
        .reset_index())

df34

Unnamed: 0,SEDE_CODIGO,PERIODO_ANIO,TOTAL_ESTUDIANTES
0,105001000001,2019,460
1,105001000001,2022,469
2,105001000043,2019,481
3,105001000043,2022,477
4,105001000108,2019,2033
...,...,...,...
22790,499001001919,2022,448
22791,499760000994,2019,127
22792,499760000994,2022,116
22793,499760001109,2019,94


## Merging the data for the index

In [9]:
# Lastly, we merge both dataframes and build the index:

df = pd.merge(df12, df34,
              on=['SEDE_CODIGO', 'PERIODO_ANIO'],
              how='inner')

In [10]:
# So the index we are making here represents: the number_of_computers/number_of_students_in_secondary_education ¡IN PERCENT!

df['INDICE_TECNOLOGICO'] = (df['TOTAL_DISPOSITIVOS_TEC'] / df['TOTAL_ESTUDIANTES'])

df

Unnamed: 0,SEDE_CODIGO,PERIODO_ANIO,TOTAL_DISPOSITIVOS_TEC,TOTAL_ESTUDIANTES,INDICE_TECNOLOGICO
0,105001000001,2019,68,460,0.147826
1,105001000001,2022,265,469,0.565032
2,105001000043,2019,58,481,0.120582
3,105001000043,2022,88,477,0.184486
4,105001000108,2019,105,2033,0.051648
...,...,...,...,...,...
22011,499001001919,2019,120,414,0.289855
22012,499001001919,2022,340,448,0.758929
22013,499760000994,2019,12,127,0.094488
22014,499760000994,2022,10,116,0.086207


## Analysis

In [11]:
# Here we compute the percentage change of total number of technology devices for all institutions in Colombia by year:

sum_devices = (df.pivot_table(index='PERIODO_ANIO',
                              aggfunc='sum',
                              values='TOTAL_DISPOSITIVOS_TEC')
               .pct_change())

sum_devices

Unnamed: 0_level_0,TOTAL_DISPOSITIVOS_TEC
PERIODO_ANIO,Unnamed: 1_level_1
2019,
2022,-0.173206


In [12]:
# Now we compute the avarage rate of technology devices of all institutions by year:

avg_devices = (df.pivot_table(index='PERIODO_ANIO',
                              aggfunc='mean',
                              values='INDICE_TECNOLOGICO'))

avg_devices

Unnamed: 0_level_0,INDICE_TECNOLOGICO
PERIODO_ANIO,Unnamed: 1_level_1
2019,0.427613
2022,0.373437


## Exporting this dataframe

In [13]:
# df.to_csv("indice_tecnologico.csv")