## AnkiPy

Python library for creating and managing Anki decks using an excel file as database of cards

IMPORTANT IMAGES: If your Windows username is "Juan" and your Anki profile is named "Estudios", the path to copy the images would be:

C:\Usuarios\Juan\AppData\Roaming\Anki2\Estudios\collection.media


Author: Mario Mañana     
Version log:   

24/03/2025 Documentation of the project.
02/01/2025 Include the tab 'rebt'.
03/10/2024 Minor updates for increasing the functionality.   
31/08/2024 Include a setting option for Electrical Machines and Drives.    
28/08/2024 Fully functional code for importing both Circuit Theory and UC24 databases.    


Packages to be included in the project.

In [1]:
import pandas as pd
import os
import re

### Flags    

Parameters to configure to run the code

In [2]:
# flags
# Set target: 1.- Teaching innovation project – Circuit Theory; 2.- UC24; 3.- Teaching innovation project – Electrical Machines and Drives
use = 1 # subjetc G875

# Subject. ie: set 'uc24' for UC 24 or Gxxx for the subject (IMPORTANT: link to the right excel file)  
asignatura = 'G822' #'G861' #'G875' #'G861' # 'G875' # 'G990' #G589-G620'  # 'G822'

baggregated = True

boverwrite = False 

if use == 1: # innovación docente. Circuit Theory
    path = 'E:\\mario\\trabajos2\\innovación_docente_2024\\ankipy\\'
    excel_file = 'uc_tc.xlsx'
    csv_out = 'uc_tc.csv' # default name
    sheet_names = ['intro', 'ca', 'resolucion', 'trifasica', 'maquinas', 'transitorio', 'cuadripolos', 'bobinas', 'filtros', 'rebt']
    #sheet_names = ['intro', 'ca']
elif use == 2: # uc24
    path = 'E:\\mario\\UC24\\documentacion\\'
    excel_file = 'uc24_anki.xlsx'
    csv_out = 'uc24.csv'
    sheet_names = ['eadmi', 'presupuesto', 'infraestructuras', 'LOSU', 'academico', 'CifrasUC']        # Name of the sheets to read
    #sheet_names = ['eadmi']        # Name of the sheets to read
elif use == 3: # innovación docente. Electrical machines and drives
    path = 'E:\\mario\\trabajos2\\innovación_docente_2024\\ankipy\\'
    excel_file = 'uc_mae.xlsx'
    csv_out = 'uc_mae.csv' # default name
    sheet_names = ['intro', 'mecanica', 'convertidores', 'ca', 'cc', 'reluctancia']
else: # unknown value
    print('Set target: error. Unknown value...')


# columns
columns = ['slide', 'pregunta', 'imagen_frontal', 'respuesta', 'imagen_respuesta', 'r1', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7', 'r8', 'r9', 'r10', 'tags','deck',asignatura] # List of columns to read

subdesk_uc24 = {
    "1": "eAdministracion",
    "2": "Presupuesto",
    "3": "Infraestructuras",
    "4": "LOSU",
    "5": "Academico",
    "6": "CifrasUC"
}

subdesk_mae = {
    "1": "Introduction",
    "2": "Mechanical requirements",
    "3": "Power converters",
    "4": "ac drives",
    "5": "dc drives",
    "6": "Switched-reluctance drives"
}


csv_aggregated = asignatura + '.csv'

full_excel_path = path + excel_file
print("******************************************")
print(" Input File")
print( full_excel_path)
print("  ")


if baggregated == True:
    full_csv_output = path + csv_aggregated
else:
    full_csv_output = path + sheet + '_00.csv'
print('Output file: ' + full_csv_output)

******************************************
 Input File
E:\mario\trabajos2\innovación_docente_2024\ankipy\uc_tc.xlsx
  
Output file: E:\mario\trabajos2\innovación_docente_2024\ankipy\G822.csv


In [3]:
# File header
# More information: https://docs.ankiweb.net/importing/text-files.html 
contenido = """#deck column:17
#separator:Semicolon
#notetype:Basic_Image_several_answers
#columns:slide;pregunta;imagen_frontal;respuesta;imagen_respuesta;r1;r2;r3;r4;r5;r6;r7;r8;r9;r10;tags;deck
#tags column:16
#html:true
"""

# Write (overwrite if it already exists) the file
with open( full_csv_output, "w") as archivo:
    archivo.write(contenido)

# Iterate over the sheet_names array
firsti = True
df_summary = pd.DataFrame(columns=['Subdesk', 'NCards'])

for index, sheet in enumerate( sheet_names):
    #print("-------------------")
    #print("Sheet: " + sheet)
    
    
    df = pd.read_excel( full_excel_path, sheet_name=sheet, usecols=columns, header=0, dtype=str)
    df.columns = df.columns.astype( str)
    
    # remove rows which <asignatura> cell is NaN
    df2 = df[ df[asignatura].notna()]
    
    # set <slide> and <deck> cells   
    for index, row in df2.iterrows():
        df2.at[ index, 'slide'] = sheet + '.' + str(row['slide'])
        if use == 1: # Teoría de Circuitos
            df2.at[ index, 'deck'] = asignatura + '::' + 'Tema ' + str(row[asignatura])  
        elif use == 2: # UC24
            df2.at[ index, 'deck'] = asignatura + '::' + subdesk_uc24[ row[asignatura]]
            #subdesk = subdesk_uc24     
        elif use == 3: # Máquinas y Accionamientos eléctricos
            if asignatura == 'G875':
                #df2.at[ index, 'deck'] = asignatura + "::" + subdesk_mae[ row[asignatura]]   
                df2.at[ index, 'deck'] = asignatura + "::" + str( row[asignatura])   
                #subdesk = subdesk_mae
            else:
                df2.at[ index, 'deck'] = asignatura + '::' + '' + str(row[asignatura])  
        else: #unknown value
            print('Set target: Unknown value...')    
    
           
    #nueva_fila = {'Subdesk': [subdesk[ row[asignatura]]], 'NCards': [len(df2)]}
    #df_nuevo = pd.DataFrame( nueva_fila)
    #df_summary = pd.concat([df_summary, df_nuevo], ignore_index=True)
    
        #df_nuevo = pd.DataFrame({
        #    'Subdesk': subdesk_uc24[ row[asignatura]], 
        #    'NCards': len(df2)})
        #df_summary = pd.concat([df_summary, df_nuevo], ignore_index=True)       
    # remove <asignatura> column once the dataframe has been filtered   
    dfs = df2.drop( columns=[asignatura])
    dfs.to_csv( full_csv_output, sep=';', index=False, na_rep='  ', mode='a', header=False)
       
    # aggregate all sheets in a single dataframe
    if firsti == True: 
        dfall = dfs
        firsti = False
    else:
        dfall = pd.concat([dfall, dfs], ignore_index=True)
    
    #print(dfs)
    
   
# dfall is the original DataFrame, and `'deck'` is the name of the column containing the string values.
conteo_valores = dfall['deck'].value_counts().reset_index()
# Rename columns for better description 
conteo_valores.columns = ['Subdesk', 'NCards']
# Print the number of cards per  Deck
#print(conteo_valores)

    
# Determine the column widths based on the longest values
ancho_subdesk = max(len(subdesk) for subdesk in conteo_valores['Subdesk']) + 10  # Column width 'Subdesk'
ancho_cards = max(len(str(card)) for card in conteo_valores['NCards']) + 5  # Column width 'Cards'

print(f"{'Subdesk':<{ancho_subdesk}}{'Cards':<{ancho_cards}}")
print('-' * (ancho_subdesk + ancho_cards + 3))

for subdesk, cards in zip( conteo_valores['Subdesk'], conteo_valores['NCards']):
    print(f"{subdesk:<{ancho_subdesk}}{cards:<{ancho_cards}}")

Subdesk               Cards  
--------------------------------
G822::Tema 4          34     
G822::Tema 2          33     
G822::Tema 3          30     


In [4]:
# Define a function to extract filenames after 'img src="'
def extract_filenames(text):
    # Expresión regular para encontrar nombres de archivos entre comillas después de 'img src="'
    #return re.findall(r'img src="([^"]+)"', text)
    #return re.findall(r"img src='([^']+)'", text)
    #return re.findall(r'img src="([^"]+)"', text)

    # Use a regular expression to find the text between the quotes after 'img src='
    match = re.search(r'img src="([^"]+)"', text)

    # Check if a match is found
    if match:
        # The text between the quotes is in the first capturing group
        filename = match.group(1)
        #print(filename)
        return filename
    else:
        #print("No match found")
        return ""




In [5]:
# Example
text = '<img src=""intro_bob_eq_par_sol_RM.png"">'

# Regular expression adjusted for escaped double quotes
filenames = re.findall(r'img src=""([^"]+)""', text)

# Show the results
print(filenames)


['intro_bob_eq_par_sol_RM.png']


In [6]:
# Read CSV file
file_path = full_csv_output  # Path to the .csv file
print(full_csv_output)


E:\mario\trabajos2\innovación_docente_2024\ankipy\G822.csv


In [7]:
# Define the number of lines to skip (for example, 6 lines)
n_lineas_a_ignorar = 6

# Read the CSV file while ignoring the first N lines
df = pd.read_csv(file_path, delimiter=';', skiprows=n_lineas_a_ignorar, header=None)



In [8]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,intro.1,Unidad de la potencia activa,,"vatio (en inglés watt), expresado con el símbo...",,,,,,,,,,,,#intro,G822::Tema 2
1,intro.2,Unidad de la potencia reactiva,,voltiamperio reactivo (en inglés reactive volt...,,,,,,,,,,,,#intro,G822::Tema 2
2,ca.1,¿A qué frecuencia se denomina <b>de marcha ind...,,50 Hz,,,,,,,,,,,,#ca,G822::Tema 2
3,ca.2,¿Cuál es el valor eficaz de la siguiente señal...,,230 V,,,,,,,,,,,,#ca,G822::Tema 2
4,ca.3,¿Cuál es el valor máximo o de pico de la sigui...,,325.3 V,,,,,,,,,,,,#ca,G822::Tema 2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,maquinas.30,¿A qué se deben principalemente las pérdidas e...,,Se deben a las pérdidas por efecto Joule en el...,,,,,,,,,,,,#maquinas,G822::Tema 4
93,maquinas.31,¿En qué parte de una máquina asíncrona aparece...,,En el estator de la máquina,,,,,,,,,,,,#maquinas,G822::Tema 4
94,maquinas.32,"¿Qué ecuación relaciona la potencia mécanica, ...",,La potencia es directamente proporcional al pa...,,,,,,,,,,,,#maquinas,G822::Tema 4
95,maquinas.33,¿Cómo se realiza una conexión en estrella en u...,,,"<img src=""conexion_estrella.PNG"">",,,,,,,,,,,#maquinas,G822::Tema 4


In [9]:
# Extract all occurrences in each row of the DataFrame
all_filenames = []

for column in df.columns:
    for row in df[column]:
        if isinstance(row, str):
            filenames = extract_filenames(row)
            if len(filenames) > 2:
                print(filenames)
                all_filenames.append(filenames)

ca_potencia_res_puro_rm.png
ca_potencia_ind_puro_rm.png
aag_analizador.png
aag_analizador.png
aag_aron.png
aag_reactiva.png
flujo_alterna.PNG
flujo_continua.PNG
ensayo_vacio.PNG
ensayo_cortocircuito.PNG
colector_delgas.PNG
dc_serie.PNG
variacion_velocidad_mcc.PNG
conexion_estrella.PNG
conexion_triangulo.PNG


In [10]:
all_filenames

['ca_potencia_res_puro_rm.png',
 'ca_potencia_ind_puro_rm.png',
 'aag_analizador.png',
 'aag_analizador.png',
 'aag_aron.png',
 'aag_reactiva.png',
 'flujo_alterna.PNG',
 'flujo_continua.PNG',
 'ensayo_vacio.PNG',
 'ensayo_cortocircuito.PNG',
 'colector_delgas.PNG',
 'dc_serie.PNG',
 'variacion_velocidad_mcc.PNG',
 'conexion_estrella.PNG',
 'conexion_triangulo.PNG']

In [11]:
# Convert filenames to lowercase 

a = 1 # 0.- no rename; 1.- rename 

if a == 1:

    path = 'E:\\mario\\trabajos2\\innovación_docente_2024\\ankipy\\uc_tc\\'  

    # Analyze all files in the specified directory
    for filename in os.listdir(path):
        # Make the full path
        original_file = os.path.join(path, filename)
    
        # Convert filename to lowecase
        new_filename = filename.lower()
    
        # New file path
        new_file = os.path.join(path, new_filename)
    
        # Rename file only if the original is not in lowercase format. 
        if original_file != new_file:
            os.rename(original_file, new_file)
            print(f'Rename: {filename} -> {new_filename}')
        else:
            print(f'The file {filename} is already in lower case.')

The file aag_acometida.png is already in lower case.
The file aag_analizador.png is already in lower case.
The file aag_aron.png is already in lower case.
The file aag_caja_gral_proteccion.png is already in lower case.
The file aag_circ_doble_sintonizado.png is already in lower case.
The file aag_circ_paso_banda_paralelo.png is already in lower case.
The file aag_circ_paso_banda_serie.png is already in lower case.
The file aag_circ_rechaza_banda_paralelo.png is already in lower case.
The file aag_circ_rechaza_banda_serie.png is already in lower case.
The file aag_contador.png is already in lower case.
The file aag_cuarto_contadores.png is already in lower case.
The file aag_c_paralelo.png is already in lower case.
The file aag_c_serie.png is already in lower case.
The file aag_c_triangulo.png is already in lower case.
The file aag_derivacion_individual.png is already in lower case.
The file aag_diferencial.png is already in lower case.
The file aag_dispositivo_gral_mando.png is already