In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
import datetime

from kiblib.utils.db import DbConn
from kiblib.utils.code2libelle import Code2Libelle
from kiblib.adherent import Adherent
from kiblib.pret import Pret
from kiblib.document import Document

In [2]:
db_conn = DbConn().create_engine()

year = 2024
#date_extraction_usagers = '2023-12-27'
date_extraction_usagers = '2024-12-25'

## C2 - Catalogue

### Notices

In [3]:
query = f"""SELECT COUNT(bm.biblionumber)
    FROM koha{year}.biblio_metadata bm"""
r = pd.read_sql(query, db_conn)
r

KeyboardInterrupt: 

### Notices créées dans l'année

In [None]:
query = f"""SELECT COUNT(bm.biblionumber)
    FROM koha{year}.biblio_metadata bm
    JOIN koha{year}.biblio b ON b.biblionumber = bm.biblionumber 
    WHERE YEAR(b.datecreated) = '{year}'"""
r = pd.read_sql(query, db_conn)
r

### Notices créées dans l'année "à la main"

In [None]:
query = f"""SELECT COUNT(bm.biblionumber)
    FROM koha{year}.biblio_metadata bm
    JOIN koha{year}.biblio b ON b.biblionumber = bm.biblionumber 
    WHERE YEAR(b.datecreated) = '{year}'
    AND ExtractValue( bm.metadata, '//datafield[@tag="801"]/subfield[@code="b"]' ) = '595126101'"""
r = pd.read_sql(query, db_conn)
r

### Nombre de notices entrées par importation dans l'année

In [None]:
# Nombre de notices entrées dans l'année - nombre de notices entrées à la main
query = f"""SELECT COUNT(bm.biblionumber)
    FROM koha{year}.biblio_metadata bm
    JOIN koha{year}.biblio b ON b.biblionumber = bm.biblionumber 
    WHERE YEAR(b.datecreated) = '{year}'
    AND ExtractValue( bm.metadata, '//datafield[@tag="801"]/subfield[@code="b"]' ) != '595126101'"""
r = pd.read_sql(query, db_conn)
r

# D - Collections

In [None]:
query = f"""SELECT
                    i.itemnumber,
                    i.barcode,
                    i.dateaccessioned,
                    i.price,
                    i.homebranch,
                    i.holdingbranch,
                    i.location,
                    i.ccode,
                    i.itemcallnumber,
                    i.notforloan,
                    i.damaged,
                    DATE(i.damaged_on),
                    i.withdrawn,
                    DATE(i.withdrawn_on),
                    i.itemlost,
                    DATE(i.itemlost_on),
                    i.onloan,
                    i.datelastborrowed,
                    i.biblionumber,
                    b.title as titre,
                    bi.publicationyear,
                    bi.itemtype,
                    i.timestamp
                FROM koha{year}.items i
                JOIN koha{year}.biblioitems bi ON i.biblionumber = bi.biblionumber
                JOIN koha{year}.biblio b ON i.biblionumber = b.biblionumber
                WHERE i.notforloan IN (-1, -2, 0, 2, -4, -3, 5)
                AND i.homebranch != 'MUS'"""
items = pd.read_sql(query, db_conn)

In [None]:
# On instancie la classe Document
# La classe Document permet d'ajouter les infos dont ont a besoin pour le SLL

exemplaires = Document(df=items, db_conn=db_conn)
exemplaires.get_doc_statdb_data()
exemplaires.get_doc_es_data()

In [None]:
exemplaires.df.columns

In [None]:
exemplaires.df['sll_acq'] = 'non'
exemplaires.df.loc[exemplaires.df['doc_item_date_creation'].astype('str').str[0:4] == '2023', 'sll_acq' ] = 'oui'

In [None]:
exemplaires_sll = exemplaires.df
exemplaires_sll = exemplaires_sll[['sll_acq', 'sll_public', 'sll_acces', 'sll_collection', 'doc_biblio_id', 'doc_item_id']]

In [None]:
exemplaires_sll['sll_public'].value_counts()

In [None]:
exemplaires_sll['sll_acq'].value_counts()

In [None]:
exemplaires_sll['sll_acces'].value_counts()

In [None]:
query = f"""SELECT
                    itemnumber,
                    barcode,
                    dateaccessioned,
                    homebranch,
                    location,
                    ccode,
                    biblionumber,
                    itemtype
                FROM statdb.stat_eliminations
                WHERE annee_mise_pilon = '{year}'"""
elim = pd.read_sql(query, db_conn)

In [None]:
eliminations = Document(df=elim, db_conn=db_conn)
eliminations.get_doc_statdb_data()
eliminations.get_doc_es_data()

In [None]:
eliminations_sll = eliminations.df
eliminations_sll = eliminations_sll[['sll_public', 'sll_acces', 'sll_collection', 'doc_biblio_id', 'doc_item_id']]

## D1 - Livres imprimés

In [None]:
data = exemplaires_sll[exemplaires_sll['sll_collection'] == 'D1 - Livres imprimés']
data_el = eliminations_sll[eliminations_sll['sll_collection'] == 'D1 - Livres imprimés']

### Fonds
#### UM - Adultes et Enfants

In [None]:
data.groupby(['sll_public'])['doc_item_id'].nunique()

#### UM en libre-accès - Adultes et Enfants

In [None]:
data_libre_acces = data[data['sll_acces'] == 'accès direct']
data_libre_acces.groupby(['sll_public'])['doc_item_id'].nunique()

#### TIT en libre accès - Adultes et Enfants

In [None]:
data_libre_acces = data[data['sll_acces'] == 'accès direct']
data_libre_acces.groupby(['sll_public'])['doc_biblio_id'].nunique()

### Acquisitions

In [None]:
data[data['sll_acq'] == 'oui'].groupby(['sll_public'])['doc_item_id'].nunique()

In [None]:
data[(data['sll_acq'] == 'oui')
     & (data['sll_acces'] == 'accès direct')].groupby(['sll_public'])['doc_biblio_id'].nunique()

#### Eliminations

In [None]:
data_el.groupby(['sll_public'])['doc_item_id'].nunique()

###  TEST LUCAS

In [None]:
data.groupby(['sll_public'])['doc_item_id'].nunique()

In [None]:
import numpy as np
data_fonds = data.pivot_table(index=['sll_public','sll_acces'],
                             values=['doc_item_id','doc_biblio_id'],
                             aggfunc="nunique",
                             #margins=True,
                             #margins_name='Total'
                )
#data_fonds['total'] = data_fonds['doc_biblio_id'] + data_fonds['doc_item_id']

data_fonds_concat = pd.concat([
    y.append(y.sum().rename((x, 'Sous-Total')))
    for x, y in data_fonds.groupby(level=0)]).append(data_fonds.sum().rename(('Enfants + Adultes','Total')))
data_fonds_concat

## D3 - Autres documents

###  Documents cartographiques

In [None]:
data = exemplaires_sll[exemplaires_sll['sll_collection'] == 'D3 – Documents cartographiques']

In [None]:
data['doc_item_id'].nunique()

In [None]:
data[data['sll_acq'] == 'oui']['doc_item_id'].nunique()

###  Musique imprimée

In [None]:
data = exemplaires_sll[exemplaires_sll['sll_collection'] == 'D3 – Musique imprimée']

In [None]:
data['doc_item_id'].nunique()

In [None]:
data[data['sll_acq'] == 'oui']['doc_item_id'].nunique()

In [None]:
data[(data['sll_acces'] == 'accès direct')]['doc_item_id'].nunique()

In [None]:
data[(data['sll_acces'] == 'accès direct')]['doc_biblio_id'].nunique()

In [None]:
data[(data['sll_acces'] == 'accès direct') & (data['sll_acq'] == 'oui')]['doc_biblio_id'].nunique()

###  Documents graphiques

In [None]:
data = exemplaires_sll[exemplaires_sll['sll_collection'] == 'D3 - Documents graphiques']

In [None]:
data['doc_item_id'].nunique()

In [None]:
data[data['sll_acq'] == 'oui']['doc_item_id'].nunique()

###  Autres documents

In [None]:
data = exemplaires_sll[exemplaires_sll['sll_collection'] == 'D3 - Autres documents']

In [None]:
data['doc_item_id'].nunique()

In [None]:
data[data['sll_acq'] == 'oui']['doc_item_id'].nunique()

### Eliminations

In [None]:
data_el = eliminations_sll[eliminations_sll['sll_collection'].str[:2] == 'D3']
len(data_el)

## D4 - Documents audiovisuels sur support

In [None]:
exemplaires_sll['sll_collection'].value_counts()

In [None]:
d4_coll = exemplaires_sll[exemplaires_sll['sll_collection'].str[:2] == 'D4']['sll_collection'].unique().tolist()

In [None]:
for c in d4_coll:
    print(c)
    print('-------------')
    data = exemplaires_sll[exemplaires_sll['sll_collection'] == c]
    data_el = eliminations_sll[eliminations_sll['sll_collection'] == c]
    print(f"fonds : {len(data)}")
    print(f"acquisitions : {len(data[data['sll_acq'] == 'oui'])}")
    print(f"eliminations : {len(data_el)}")
    
    print('-------------') 

In [None]:
data = exemplaires_sll[exemplaires_sll['sll_collection'] == 'D1 - Livres imprimés']
data_el = eliminations_sll[eliminations_sll['sll_collection'] == 'D1 - Livres imprimés']

# E - Usages et usagers de la bibliothèque

## E1 - Usagers

In [None]:
query = f"""SELECT
    date_extraction,
    age as adh_age,
    geo_ville as adh_geo_ville,
    geo_roubaix_iris as adh_geo_rbx_iris_code,
    sexe as adh_sexe,
    inscription_code_carte as adh_inscription_carte_code,
    inscription_code_site as adh_inscription_site_code,
    inscription_attribut as adh_inscription_attribut_action_code,
    inscription_fidelite as adh_inscription_nb_annees_adhesion,
    nb_venues_prets_mediatheque,
    nb_venues_prets_bus
FROM statdb.stat_adherents WHERE date_extraction = '{date_extraction_usagers}'"""
inscrits = pd.read_sql(query, db_conn)

In [None]:
adh = Adherent(df=inscrits, db_conn=db_conn)
adh.get_adherent_statdb_data()
adh.get_adherent_es_data()

In [None]:
adh.df['nb'] = 1
adh.df['adh_age_sll'] = 'Non pertinent'
adh.df.loc[adh.df['adh_age_lib3'] == '0 - 14 ans', 'adh_age_sll'] = '0 - 14 ans'
adh.df.loc[adh.df['adh_age_lib3'] == '15 - 24 ans', 'adh_age_sll'] = '15 - 64 ans'
adh.df.loc[adh.df['adh_age_lib3'] == '25 - 64 ans', 'adh_age_sll'] = '15 - 64 ans'
adh.df.loc[adh.df['adh_age_lib3'] == '65 ans et plus', 'adh_age_sll'] = '65 ans et plus'
adh.df.loc[adh.df['adh_age_lib3'] == 'Inconnu', 'adh_age_sll'] = 'Inconnu'

### Inscrits actifs

In [None]:
data = adh.df

In [None]:
data.groupby(['adh_age_sll', 'adh_sexe'])['nb'].sum()

In [None]:
data.groupby(['adh_age_sll'])['nb'].sum()

In [None]:
data.groupby(['adh_age_lib2', 'adh_sexe'])['nb'].sum()

In [None]:
data[(data['adh_geo_ville'] == 'ROUBAIX') & (~adh.df['adh_age_sll'].isin(['Non pertinent', 'Inconnu']))]['nb'].sum()

### dont nouveaux inscrits

In [None]:
data = adh.df[adh.df['adh_inscription_nb_annees_adhesion'] == 0]
len(data)

In [None]:
data.groupby(['adh_age_sll', 'adh_sexe'])['nb'].sum()

In [None]:
data.groupby(['adh_age_sll'])['nb'].sum()

In [None]:
data.groupby(['adh_age_lib2', 'adh_sexe'])['nb'].sum()

In [None]:
data.groupby(['adh_age_lib2'])['nb'].sum()

In [None]:
data[(data['adh_geo_ville'] == 'ROUBAIX') & (~adh.df['adh_age_sll'].isin(['Non pertinent', 'Inconnu']))]['nb'].sum()

In [None]:
data['adh_inscription_carte_personnalite'].value_counts()

### Emprunteurs actifs

In [None]:
adh.df.columns

In [None]:
adh.df['emprunteur'] = 'non'
adh.df.loc[adh.df['nb_venues_prets_bus'] > 0, 'emprunteur'] = 'oui'
adh.df.loc[adh.df['nb_venues_prets_mediatheque'] > 0, 'emprunteur'] = 'oui'

data = adh.df[adh.df['emprunteur'] == 'oui']
len(data)

In [None]:
data.groupby(['adh_age_sll', 'adh_sexe'])['nb'].sum()

In [None]:
data.groupby(['adh_age_sll'])['nb'].sum()

In [None]:
data.groupby(['adh_age_lib2', 'adh_sexe'])['nb'].sum()

In [None]:
data.groupby(['adh_age_lib2'])['nb'].sum()

In [None]:
data[(data['adh_geo_ville'] == 'ROUBAIX') & (~adh.df['adh_age_sll'].isin(['Non pertinent', 'Inconnu']))]['nb'].sum()

In [None]:
data['adh_inscription_carte_personnalite'].value_counts()

### Fréquentation : entrées dans l'établissement

In [None]:
query = f"""SELECT
    SUM(entrees)
FROM statdb.stat_entrees WHERE YEAR(datetime) = '{year}'"""
entrees = pd.read_sql(query, db_conn)
entrees

## E2 - Prêts

In [None]:
query = f"""SELECT
    issuedate as pret_date,
    date_due as pret_date_retour_prevue,
    returndate as pret_date_retour_effectif,
    renewals as pret_nb_renouvellement,
    branch as pret_site_pret_code,
    age as adh_age,
    sexe as adh_sexe_code,
    ville as adh_geo_ville,
    iris as adh_geo_rbx_iris_code,
    branchcode as adh_inscription_site_code,
    categorycode as adh_inscription_carte_code,
    fidelite as adh_inscription_nb_annees_adhesion,
    itemnumber as doc_item_id,
    homebranch as doc_item_site_detenteur_code,
    location as doc_item_localisation_code,
    ccode as doc_item_collection_ccode,
    itemcallnumber as doc_item_cote,
    itemtype as doc_biblio_support_code,
    publicationyear as doc_biblio_annee_publication,
    biblionumber as doc_biblio_id,
    dateaccessioned as doc_item_date_creation
FROM statdb.stat_issues
WHERE YEAR(issuedate) = '{year}'"""
prts = pd.read_sql(query, db_conn)
len(prts)

In [None]:
c2l = Code2Libelle(db_conn)
c2l.get_val()
c2l = c2l.dict_codes_lib

prets = Pret(df=prts, db_conn=db_conn, c2l=c2l)
prets.get_pret_statdb_data()
prets.get_pret_es_data()

In [None]:
prets_sll = prets.df[['sll_public', 'sll_acces', 'sll_prets_coll', 'sll_collection', 'sll_prets']]
prets_sll['nb'] = 1

In [None]:
prets_sll['sll_prets'].value_counts()

In [None]:
prets_sll['sll_public'].value_counts()

In [None]:
prets_sll['sll_prets_coll'].value_counts()

### Hors collectivités

In [None]:
data = prets_sll[prets_sll['sll_prets_coll'] == 'Pas de prêt aux collectivités']

In [None]:
data.groupby(['sll_prets'])['nb'].sum()

In [None]:
data.groupby(['sll_prets', 'sll_public'])['nb'].sum()

In [None]:
data.groupby(['sll_public'])['nb'].sum()

In [None]:
data['nb'].sum()

### Collectivités

In [None]:
data = prets_sll[prets_sll['sll_prets_coll'] == 'Prêt aux collectivités']
len(data)

In [None]:
data.groupby(['sll_prets'])['nb'].sum()

In [None]:
prets_sll[prets_sll['sll_prets_coll'] == 'Prêt aux collectivités']

## E3 - Autres types d'usages

### Réservations

In [None]:
query = f"""SELECT
    COUNT(*)
FROM statdb.stat_reserves WHERE YEAR(reservedate) = '{year}'"""
r = pd.read_sql(query, db_conn)
r

## E5 - Sessions internet (connexions + wifi)

In [None]:
query = f"""SELECT
    COUNT(*)
FROM statdb.stat_webkiosk WHERE YEAR(heure_deb) = '{year}'"""
r = pd.read_sql(query, db_conn)
r

In [None]:
query = f"""SELECT
    COUNT(*)
FROM statdb.stat_wifi WHERE YEAR(start_wifi) = '{year}'"""
r = pd.read_sql(query, db_conn)
r

### E502 - Nombre de visites du site dans la bibliothèque (visite interne)

In [None]:
query = f"""SELECT SUM(visites) AS 'Total visites interne'
FROM statdb.stat_web2 sw 
WHERE YEAR(`date`) = {year}
AND origine = 'interne'"""

pd.read_sql(query,db_conn)

### E503 - Nombre de visite du site internet hors bibliothèque (visite externe)

In [None]:
query = f"""SELECT SUM(visites) AS 'Total visites externe'
FROM statdb.stat_web2 sw 
WHERE YEAR(`date`) = {year}
AND origine = 'externe'"""

pd.read_sql(query,db_conn)

# H4 - Action culturelle

In [None]:
query = f"SELECT * FROM statdb.stat_action_culturelle WHERE YEAR(`date`)={year}"

In [None]:
ac_2023 = pd.read_sql(query,db_conn)

## H2 - Etablissements culturels

> Pour l'année 2023, je vais sortir une liste à compléter par Mathilde M. pour lister les partenaires par types d'établissement culturel

> Il faudra ensuite importé le tableau complété et faire les requêtes nécessaires

In [None]:
ac_partenariats_23 = ac_2023.groupby(["type","partenariat"])["participants"].sum().to_frame()

In [None]:
ac_partenariats_23.to_excel("../data_lucas/liste_partenariats_action_culturelle_2023_V2.xlsx")

In [None]:
# Création d'une liste des partenaires et du nombre de participant pour chacun
ac_2023.groupby('partenariat')['participants'].sum("total_participants").to_frame().to_excel('../data_lucas/liste_partenariats_action_culturelle_2023.xlsx')

In [None]:
#Import de la liste complété


## H4 Actions au sein de l'établissement

In [96]:
# Ajouter une colonne Conférences, rencontres, lecture = Conférence + Rencontre/conférence + lecture

In [97]:
# Recréer des catégories d'usagers sur la base des données suivantes : 
# Tous publics = toutes les actions qui ne sont pas Enfants
# Enfants = 0-14 ans
# Tous publics = inverse de Enfants et PetiteEnfance
# Enfants = Enfants + Petite Enfance

In [98]:
ac_2023['public_sll'] = np.NaN

In [99]:
ac_2023.loc[ac_2023['public'].isin(['Enfants','Petite enfance']),['public_sll']] = 'Enfants' 

In [100]:
ac_2023.loc[~ac_2023['public'].isin(['Enfants','Petite enfance']),['public_sll']] = 'Tous publics' 

In [101]:
ac_2023.head(5)

Unnamed: 0,id,date,action,lieu,type,public,partenariat,participants,jauge,evenement,notes,public_sll
0,1613,2024-01-03,Jeux vidéo,La criée,Jeux,Tout public,,60.0,,,,Tous publics
1,1614,2024-01-06,Jeux vidéo,La criée,Jeux,Tout public,,55.0,,,,Tous publics
2,1615,2024-01-13,39-45 : elles n'ont rien oublié,La criée,Projection/diffusion,Adultes,,80.0,45.0,,,Tous publics
3,1616,2024-01-14,Tricot des Lulus,La criée,Atelier,Tout public,,22.0,20.0,,,Tous publics
4,1617,2024-01-16,Atelier informatique pour débutants,Espace multimédia,Atelier,Adultes,,10.0,12.0,,,Tous publics


In [102]:
ac_2023['nb_actions'] = 1

In [103]:
# Séances de Contes = Lectures/Contes + Racontées (demander ces chiffres à Chantale et/ou Laetitia)
# Formations = Ateliers Action culturelle + Ateliers informatiques (demander ces chiffres à Maïté)

In [104]:
ac_2023_hors_evenement = ac_2023[ac_2023['evenement'].isna()]

In [105]:
ac_2023_hors_evenement

Unnamed: 0,id,date,action,lieu,type,public,partenariat,participants,jauge,evenement,notes,public_sll,nb_actions
0,1613,2024-01-03,Jeux vidéo,La criée,Jeux,Tout public,,60.0,,,,Tous publics,1
1,1614,2024-01-06,Jeux vidéo,La criée,Jeux,Tout public,,55.0,,,,Tous publics,1
2,1615,2024-01-13,39-45 : elles n'ont rien oublié,La criée,Projection/diffusion,Adultes,,80.0,45.0,,,Tous publics,1
3,1616,2024-01-14,Tricot des Lulus,La criée,Atelier,Tout public,,22.0,20.0,,,Tous publics,1
4,1617,2024-01-16,Atelier informatique pour débutants,Espace multimédia,Atelier,Adultes,,10.0,12.0,,,Tous publics,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
213,1926,2024-12-20,Radio à La Grand-Plage,Salle VDM,Atelier,Adultes,,4.0,15.0,,,Tous publics,1
214,1927,2024-12-21,Atelier de conversation en français,Salle VDM,Atelier,Adultes,,7.0,10.0,,,Tous publics,1
215,1928,2024-12-21,Petit déjeuner jeux vidéo,La criée,Jeux,Familles,,12.0,12.0,,,Tous publics,1
216,1929,2024-12-21,Les racontées musicales,Salle d'heure du conte,Lecture/contes,Petite enfance,,48.0,40.0,,25 enfants et 23 adultes,Enfants,1


> Si on compte dans action dans l'établissement les fêtes etc... Alors pour tous les dataframes sauf le dataframe Fêtes etc... utiliser `ac_2023.loc[ac_2023_hors_evenement]`. Cela permettra de ne pas compter 2 fois le même événement.

### H401 - Expositions

In [106]:
ac_2023.loc[ac_2023['type']=='Exposition',['type_sll']] = 'Exposition'

In [107]:
# Nombre d'exposition dans l'année pour Tous publics
len(ac_2023[(ac_2023['type']=='Exposition')&
            (ac_2023['public_sll']=='Tous publics')]
   )

6

In [108]:
# Nombre d'exposition dans l'année pour enfants
len(ac_2023[(ac_2023['type']=='Exposition')&
            (ac_2023['public_sll']=='Enfants')]
   )

0

### H409 - Conférences, rencontres, lectures

In [109]:
ac_2023.loc[ac_2023['type'].isin(['Conférence','Lecture','Rencontre/conférence']),['type_sll']] = 'Conférences, rencontres, lectures'

In [110]:
# Nombre de conférences, rencontres et lectures TOUT PUBLIC
len(ac_2023[(ac_2023['type_sll']=='Conférences, rencontres, lectures')&
            (ac_2023['public_sll']=='Tous publics')]
   )

30

In [111]:
# Nombre de conférences, rencontres et lectures Enfants
len(ac_2023[(ac_2023['type_sll']=='Conférences, rencontres, lectures')&
        (ac_2023['public_sll']=='Enfants')]
   )

0

In [112]:
# Population touchée
ac_2023.groupby(['type_sll','public_sll'])['participants'].sum()

type_sll                           public_sll  
Conférences, rencontres, lectures  Tous publics    674.0
Exposition                         Tous publics    123.0
Name: participants, dtype: float64

### H413 - Concerts projections

In [113]:
ac_2023.loc[ac_2023['type'].isin(['Concert','Projection','Projection/diffusion']),['type_sll']] = 'Concerts, projections'

In [114]:
len(ac_2023[(ac_2023['type_sll']=='Concerts, projections')&
            (ac_2023['public_sll']=='Tous publics')
           ]
   )

16

In [115]:
len(ac_2023[(ac_2023['type_sll']=='Concerts, projections')&
            (ac_2023['public_sll']=='Enfants')
           ]
   )

8

In [116]:
ac_2023.groupby(['type_sll','public_sll'])['participants'].sum()

type_sll                           public_sll  
Concerts, projections              Enfants         514.0
                                   Tous publics    446.0
Conférences, rencontres, lectures  Tous publics    674.0
Exposition                         Tous publics    123.0
Name: participants, dtype: float64

### H417 - Séances de conte

In [117]:
ac_2023.loc[(ac_2023['type']=='Lecture/contes'),['type_sll']] = 'Séances de conte'

In [118]:
len(ac_2023[(ac_2023['type']=='Séances de conte')&
            (ac_2023['public']=='Tout public')
           ]
   )

0

In [119]:
len(ac_2023[(ac_2023['type']=='Séances de conte')&
            (ac_2023['public']=='Enfants')
           ]
   )

0

In [120]:
ac_2023[ac_2023['type']=='Séances de conte']['participants'].sum()

0.0

### H420 - Clubs de lecteurs ateliers d'écriture

In [121]:
ac_2023.loc[ac_2023['type']=='Club lecture',['type_sll']] = 'Club lecture'

In [122]:
# Nombre total de clubs lectures TOUT PUBLIC
len(ac_2023[(ac_2023['type']=='Club lecture')&
            (ac_2023['public_sll']=='Tous publics')
           ])

7

In [123]:
# Nombre total de clubs lectures ENFANTS
len(ac_2023[(ac_2023['type']=='Club lecture')&
            (ac_2023['public_sll']=='Enfants')
           ])

0

In [124]:
ac_2023[ac_2023['type']=='Club lecture']['participants'].sum()

87.0

### H424 - Fêtes, salons du livre, festivals



In [125]:
ac_2023.loc[ac_2023['evenement'].notna(),['type_sll']] = 'Fêtes, salons du livre, festivals'

In [126]:
ac_2023[ac_2023['type_sll']=='Fêtes, salons du livre, festivals']

Unnamed: 0,id,date,action,lieu,type,public,partenariat,participants,jauge,evenement,notes,public_sll,nb_actions,type_sll
6,1619,2024-01-20,Bulles de lecture - à partir de 4 ans,Espace RDC,Lecture/contes,Enfants,,25.0,,Nuits de la lecture,,Enfants,1,"Fêtes, salons du livre, festivals"
7,1620,2024-01-20,Bulles de lecture 0-3 ans,Salle d'heure du conte,Lecture/contes,Enfants,,17.0,,Nuits de la lecture,10 enfants et 7 adultes,Enfants,1,"Fêtes, salons du livre, festivals"
8,1621,2024-01-20,Zik et bouquins,La criée,Lecture/contes,Familles,,39.0,,Nuits de la lecture,,Tous publics,1,"Fêtes, salons du livre, festivals"
9,1622,2024-01-20,"Between, Ballet du Nord",Espace RDC,Lecture/contes,Tout public,,26.0,,Nuits de la lecture,,Tous publics,1,"Fêtes, salons du livre, festivals"
10,1623,2024-01-21,Le corps fait son cinéma !,Espace RDC,Atelier,Tout public,,28.0,,Nuits de la lecture,,Tous publics,1,"Fêtes, salons du livre, festivals"
11,1624,2024-01-21,Corps accords,La criée,Lecture/contes,Tout public,,25.0,,Nuits de la lecture,,Tous publics,1,"Fêtes, salons du livre, festivals"
18,1631,2024-02-02,Questions de parents : épuisement parental,La criée,Rencontre/conférence,Adultes,,7.0,,Week-end des bébés,,Tous publics,1,"Fêtes, salons du livre, festivals"
23,1636,2024-02-03,À la pêche aux albums !,Salle d'heure du conte,Lecture/contes,Enfants,,28.0,,Week-end des bébés,14 enfants et 14 adultes,Enfants,1,"Fêtes, salons du livre, festivals"
25,1638,2024-02-03,Veillée musicale,Espace Jeunesse,Concert,Enfants,,65.0,,Week-end des bébés,,Enfants,1,"Fêtes, salons du livre, festivals"
26,1639,2024-02-04,Racontée musicale spéciale,Salle d'heure du conte,Lecture/contes,Enfants,,110.0,,Week-end des bébés,,Enfants,1,"Fêtes, salons du livre, festivals"


### H428 - Autres

In [127]:
ac_2023.loc[(ac_2023['type']=='Autre',['type_sll'])] = 'Autre'

In [128]:
len(ac_2023[(ac_2023['type']=='Autre')&
        (ac_2023['public']=='Tout public')
       ]
   )

5

In [129]:
len(ac_2023[(ac_2023['type']=='Autre')&
        (ac_2023['public']=='Enfants')
       ]
   )

1

In [130]:
ac_2023[ac_2023['type_sll']=='Autre']

Unnamed: 0,id,date,action,lieu,type,public,partenariat,participants,jauge,evenement,notes,public_sll,nb_actions,type_sll
52,1665,2024-03-23,Speed-dating patrimonial,Magasins,Autre,Adultes,,14.0,,,,Tous publics,1,Autre
54,1667,2024-03-23,Dictée,La criée,Autre,Adultes,,41.0,,Semaine Langue française & francophonie,,Tous publics,1,Autre
79,1692,2024-05-12,Vide dressing,La criée,Autre,Tout public,Do it Céline Caffè,35.0,,,,Tous publics,1,Autre
103,1721,2024-06-08,Trésors du Patrimoine et des Archives,La criée,Autre,Tout public,,8.0,,À la découverte des manuscrits,,Tous publics,1,Autre
105,1723,2024-06-09,Trésors du Patrimoine et des Archives,La criée,Autre,Tout public,,6.0,,À la découverte des manuscrits,,Tous publics,1,Autre
139,1852,2024-09-28,Grande braderie de La Grand-Plage,Espace RDC,Autre,Tout public,,0.0,,,,Tous publics,1,Autre
140,1853,2024-09-29,Vide dressing,Espace RDC,Autre,Tout public,Do It Céline Caffè,60.0,50.0,,,Tous publics,1,Autre
158,1871,2024-10-12,Visites,Toute la médiathèque,Autre,Adultes,,32.0,30.0,Nuit des bibliothèques,,Tous publics,1,Autre
167,1880,2024-10-19,Graines de philo enfant,Salle d'heure du conte,Autre,Enfants,,15.0,15.0,,,Enfants,1,Autre
176,1889,2024-11-10,Karaoké en famille,La criée,Autre,Familles,,64.0,40.0,,,Tous publics,1,Autre


### Formations au public

In [131]:
ac_2023[ac_2023['type']=='Atelier']

Unnamed: 0,id,date,action,lieu,type,public,partenariat,participants,jauge,evenement,notes,public_sll,nb_actions,type_sll
3,1616,2024-01-14,Tricot des Lulus,La criée,Atelier,Tout public,,22.0,20.0,,,Tous publics,1,
4,1617,2024-01-16,Atelier informatique pour débutants,Espace multimédia,Atelier,Adultes,,10.0,12.0,,,Tous publics,1,
5,1618,2024-01-19,Radio à La Grand-Plage,Salle VDM,Atelier,Adultes,,15.0,10.0,,,Tous publics,1,
10,1623,2024-01-21,Le corps fait son cinéma !,Espace RDC,Atelier,Tout public,,28.0,,Nuits de la lecture,,Tous publics,1,"Fêtes, salons du livre, festivals"
12,1625,2024-01-24,Dans tes rêves,La criée,Atelier,Enfants,La Condition Publique,45.0,,,,Enfants,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199,1912,2024-12-06,Radio à La Grand-Plage,Salle VDM,Atelier,Adultes,,10.0,15.0,,,Tous publics,1,
206,1919,2024-12-13,Radio à La Grand-Plage,Salle VDM,Atelier,Adultes,,8.0,15.0,,,Tous publics,1,
211,1924,2024-12-15,Mon maquillage Zéro Déchet pour les fêtes !,La criée,Atelier,Tout public,,18.0,20.0,,,Tous publics,1,
213,1926,2024-12-20,Radio à La Grand-Plage,Salle VDM,Atelier,Adultes,,4.0,15.0,,,Tous publics,1,


In [132]:
ac_2023.pivot_table(index=['type_sll','public_sll'],
                    values='nb_actions',
                    aggfunc=sum
                   )

Unnamed: 0_level_0,Unnamed: 1_level_0,nb_actions
type_sll,public_sll,Unnamed: 2_level_1
Autre,Enfants,1
Autre,Tous publics,10
Club lecture,Tous publics,6
"Concerts, projections",Enfants,6
"Concerts, projections",Tous publics,13
"Conférences, rencontres, lectures",Tous publics,24
Exposition,Tous publics,1
"Fêtes, salons du livre, festivals",Enfants,12
"Fêtes, salons du livre, festivals",Tous publics,34
Séances de conte,Enfants,9


In [133]:
ac_2023.pivot_table(index='type_sll',
                    values='participants',
                    aggfunc=sum
                   )

Unnamed: 0_level_0,participants
type_sll,Unnamed: 1_level_1
Autre,279.0
Club lecture,72.0
"Concerts, projections",720.0
"Conférences, rencontres, lectures",555.0
Exposition,18.0
"Fêtes, salons du livre, festivals",1628.0
Séances de conte,417.0


# K - Patrimoine

## K2 - Fonds et acquisitions

In [5]:
query = f"""SELECT
                    i.itemnumber,
                    i.barcode,
                    i.dateaccessioned,
                    i.price,
                    i.homebranch,
                    i.holdingbranch,
                    i.location,
                    i.ccode,
                    i.itemcallnumber,
                    i.notforloan,
                    i.damaged,
                    DATE(i.damaged_on),
                    i.withdrawn,
                    DATE(i.withdrawn_on),
                    i.itemlost,
                    DATE(i.itemlost_on),
                    i.onloan,
                    i.datelastborrowed,
                    i.biblionumber,
                    b.title as titre,
                    bi.publicationyear,
                    bi.itemtype,
                    i.timestamp
                FROM koha{year}.items i
                JOIN koha{year}.biblioitems bi ON i.biblionumber = bi.biblionumber
                JOIN koha{year}.biblio b ON i.biblionumber = b.biblionumber
                WHERE i.notforloan IN (-1, -2, 0, 2, -4, 5)
                AND i.homebranch != 'MUS'"""
items = pd.read_sql(query, db_conn)

In [6]:
exemplaires = Document(df=items, db_conn=db_conn)
exemplaires.get_doc_statdb_data()
exemplaires.get_doc_es_data()

In [7]:
pat_ccode = ['PENACZZ', 'PENCVZZ', 'PENDEZZ', 'PENHPZZ', 'PENPDZZ', 'PENRSZZ', 'AAPPRLP',
             'PPAFIZZ', 'PPEFGZZ', 'PPELGZZ', 'PPEPMZZ', 'PPEPRZZ']
patrimoine_df = exemplaires.df[exemplaires.df['doc_item_collection_ccode'].isin(pat_ccode)]
patrimoine_df['sll_acq'] = 'non'
patrimoine_df.loc[patrimoine_df['doc_item_date_creation'].astype('str').str[0:4] == '2024', 'sll_acq' ] = 'oui'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  patrimoine_df['sll_acq'] = 'non'


In [11]:
len(patrimoine_df)

61937

### Livres imprimés

In [12]:
data = patrimoine_df[patrimoine_df['sll_collection'] == 'D1 - Livres imprimés']

In [13]:
len(data)

35901

In [14]:
len(data[data['sll_acq'] == 'oui'])

365

In [15]:
data['publicationyear_'] = data['publicationyear'].str.extract(r'(^\d{4})').astype(float)
l1811 = data[data['publicationyear_'] < 1811]
len(l1811)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['publicationyear_'] = data['publicationyear'].str.extract(r'(^\d{4})').astype(float)


1055

In [16]:
len(l1811[l1811['sll_acq'] == 'oui'])

0

In [17]:
len(patrimoine_df[patrimoine_df['doc_item_collection_ccode'] == 'AAPPRLP'])

546

In [18]:
len(patrimoine_df[(patrimoine_df['doc_item_collection_ccode'] == 'AAPPRLP') & (patrimoine_df['sll_acq'] == 'oui')])

0

### Publications en séries : ne pas traiter

In [20]:
#data = patrimoine_df[patrimoine_df['sll_collection'] == 'D1 - Publications en série imprimées']
#len(data)

In [21]:
#len(data[data['sll_acq'] == 'oui'])

### Cartes

In [22]:
data = patrimoine_df[patrimoine_df['sll_collection'] == 'D3 – Documents cartographiques']
print(f"Fonds : {len(data)}")
print(f"Acquisitions : {len(data[data['sll_acq'] == 'oui'])}")

Fonds : 0
Acquisitions : 0


### Partitions

In [23]:
data = patrimoine_df[patrimoine_df['sll_collection'] == 'D3 – Musique imprimée']
print(f"Fonds : {len(data)}")
print(f"Acquisitions : {len(data[data['sll_acq'] == 'oui'])}")

Fonds : 1661
Acquisitions : 4


### Documents iconographiques: aucun sens, signalé dans bn-r

In [24]:
# data = patrimoine_df[patrimoine_df['sll_collection'] == 'D3 - Documents graphiques']
# print(f"Fonds : {len(data)}")
# print(f"Acquisitions : {len(data[data['sll_acq'] == 'oui'])}")

### Documents sonores

In [25]:
data = patrimoine_df[patrimoine_df['sll_collection'].isin(['D4 - Documents audiovisuels fonds adultes / Documents sonores : musique',
                                                           'D4 - Documents audiovisuels fonds adultes / Documents sonores : livres enregistrés'])]
print(f"Fonds : {len(data)}")
print(f"Acquisitions : {len(data[data['sll_acq'] == 'oui'])}")

Fonds : 22538
Acquisitions : 1045


### Images animées

In [26]:
data = patrimoine_df[patrimoine_df['sll_collection'].isin(['D4 - Documents audiovisuels fonds adultes / documents vidéo adultes',
                                                           'D4 - Documents audiovisuels fonds enfants / documents vidéo enfants'])]
print(f"Fonds : {len(data)}")
print(f"Acquisitions : {len(data[data['sll_acq'] == 'oui'])}")

Fonds : 895
Acquisitions : 0


In [9]:
patrimoine_df[patrimoine_df['doc_item_collection_ccode'] == 'PPAFIZZ']

Unnamed: 0,itemnumber,barcode,dateaccessioned,price,homebranch,holdingbranch,location,ccode,itemcallnumber,notforloan,...,doc_statut_abime,doc_statut_desherbe,doc_statut_perdu,doc_usage_emprunt,doc_usage_date_dernier_pret_annee,doc_biblio_support,sll_public,sll_acces,sll_collection,sll_acq
1493,3436,C0002444543,2005-03-22,64.79,MED,MED,MED3C,PPAFIZZ,FL VHS/Image 201,2,...,Non,Non,Non,non,,"VHS, UMATIC ou film",adultes,accès indirect,D4 - Documents audiovisuels fonds adultes / do...,non
6396,12460,C0001418272,2005-03-23,,MED,MED,MED3C,PPAFIZZ,FL VHS/Image 94,2,...,Non,Non,Non,non,,"VHS, UMATIC ou film",adultes,accès indirect,D4 - Documents audiovisuels fonds adultes / do...,non
6397,12461,C0001560532,2005-03-23,,MED,MED,MED3C,PPAFIZZ,FL VHS/Image 95,2,...,Non,Non,Non,non,,"VHS, UMATIC ou film",adultes,accès indirect,D4 - Documents audiovisuels fonds adultes / do...,non
6398,12462,C0001560524,2005-03-23,,MED,MED,MED3C,PPAFIZZ,FL VHS/Image 96,2,...,Non,Non,Non,non,,"VHS, UMATIC ou film",adultes,accès indirect,D4 - Documents audiovisuels fonds adultes / do...,non
6684,13017,C0001557979,2005-03-23,15.24,MED,MED,MED3C,PPAFIZZ,FL VHS/Image 97,2,...,Non,Non,Non,non,,"VHS, UMATIC ou film",adultes,accès indirect,D4 - Documents audiovisuels fonds adultes / do...,non
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130172,268984,C0003610074,2011-06-09,,MED,MED,MED3C,PPAFIZZ,FL Image 633,2,...,Non,Non,Non,non,,DVD,adultes,accès indirect,D4 - Documents audiovisuels fonds adultes / do...,non
130173,268985,C0003610016,2011-06-09,,MED,MED,MED3C,PPAFIZZ,FL Image 634,2,...,Non,Non,Non,non,,DVD,adultes,accès indirect,D4 - Documents audiovisuels fonds adultes / do...,non
134838,276993,C0005486441,2012-01-12,,MED,MED,MED3C,PPAFIZZ,FL Image / 637,2,...,Non,Non,Non,non,,DVD,adultes,accès indirect,D4 - Documents audiovisuels fonds adultes / do...,non
147810,300472,C0005356683,2013-11-28,,MED,MED,MED3C,PPAFIZZ,FL Image/638,2,...,Non,Non,Non,non,,DVD,adultes,accès indirect,D4 - Documents audiovisuels fonds adultes / do...,non
