In [63]:
import pandas as pd
import re
import duckdb
from IPython.core.magic import register_cell_magic
from IPython.display import display, HTML

In [79]:
coreutils_function_types_data = {
    "function_name": list(),
    "argument_types": list(),
    "return_type": list()
}
pattern = r"(.*),args:\[(.*)\],ret:(.*)"

with open('data/coreutils_function_types.csv', 'r') as file:
    for line in file:
        match = re.search(pattern, line)
        function_name = match.group(1)
        argument_types = match.group(2).split(";")[:-1]
        return_type = match.group(3)
        coreutils_function_types_data["function_name"].append(function_name)
        coreutils_function_types_data["argument_types"].append(argument_types)
        coreutils_function_types_data["return_type"].append(return_type)
coreutils_df = pd.DataFrame(coreutils_function_types_data)
coreutils_argument_types_df = coreutils_df.explode("argument_types")

In [80]:
db = duckdb.connect(database=':memory:')
db.register('CoreUtilsFunctions', coreutils_df)
db.register('CoreUtilsArgumentTypes', coreutils_argument_types_df)

<duckdb.duckdb.DuckDBPyConnection at 0x156ef5a30>

In [64]:
def db_query(line, cell):
    query = line + '\n' + cell
    df = db.execute(query).fetchdf()
    display(HTML(df.to_html()))

get_ipython().register_magic_function(db_query, 'cell')

In [65]:
%%db_query
SELECT *
FROM CoreUtilsFunctions

Unnamed: 0,function_name,argument_types,return_type
0,asnprintf,"[i8*, i64*, i8*]",i8*
1,rpl_asprintf,"[i8**, i8*]",i32
2,rpl_fopen,"[i8*, i8*]",%struct._IO_FILE*
3,memcpy.inline,"[i8*, i8*, i64]",i8*
4,orig_fopen,"[i8*, i8*]",%struct._IO_FILE*
5,mktime_internal,"[%struct.tm*, %struct.tm* (i64*, %struct.tm*)*, i64*]",i64
6,leapyear,[i64],i1
7,ydhms_diff,"[i64, i64, i32, i32, i32, i32, i32, i32, i32, i32]",i64
8,ranged_convert,"[%struct.tm* (i64*, %struct.tm*)*, i64*, %struct.tm*]",%struct.tm*
9,tm_diff,"[i64, i64, i32, i32, i32, %struct.tm*]",i64


In [85]:
%%db_query
SELECT
    argument_types,
    COUNT(*) AS count
FROM CoreUtilsArgumentTypes
GROUP BY 1
ORDER BY count DESC

Unnamed: 0,argument_types,count
0,i8*,2168
1,i32,1337
2,i64,1097
3,,329
4,i8**,281
5,%struct._IO_FILE*,261
6,i8,242
7,i1,204
8,i64*,174
9,%struct.stat*,130


In [66]:
%%db_query
WITH
    DistinctFunctions AS (
        SELECT DISTINCT
            function_name,
            argument_types,
            return_type
        FROM CoreUtilsFunctions
    )
SELECT
    return_type,
    COUNT(*) AS num_instances
FROM DistinctFunctions
GROUP BY 1
ORDER BY num_instances DESC

Unnamed: 0,return_type,num_instances
0,void,680
1,i32,523
2,i1,422
3,i8*,230
4,i64,215
5,[2 x i64],12
6,%struct.valinfo*,11
7,fp128,9
8,i32*,8
9,%struct._IO_FILE*,8


In [70]:
%%db_query
WITH
    DistinctFunctions AS (
        SELECT DISTINCT
            function_name,
            argument_types,
            return_type
        FROM CoreUtilsFunctions
    )
SELECT
    return_type,
    ARRAY_SORT(argument_types) AS argument_types,
    COUNT(*) AS num_instances
FROM DistinctFunctions
GROUP BY 1, 2
ORDER BY num_instances DESC

Unnamed: 0,return_type,argument_types,num_instances
0,void,[],114
1,i32,"[i8*, i8*]",95
2,void,[i8*],53
3,i1,[i32],52
4,i32,[i32],49
5,i1,[i8*],32
6,i64,"[i64, i8*]",27
7,i8*,[i8*],27
8,i1,[],25
9,i1,"[i8*, i8*]",23


In [99]:
%%db_query
WITH
    DistinctFunctions AS (
        SELECT DISTINCT
            function_name,
            argument_types,
            return_type
        FROM CoreUtilsFunctions
    )
SELECT
    CAST(COUNT_IF(
        LEN(argument_types) > 0
        AND LIST_HAS_ALL(['i1', 'i8', 'i16', 'i32', 'i64'], argument_types)
        AND return_type IN ('i1', 'i8', 'i16', 'i32', 'i64')
    ) AS UINT64) AS num_primitive_functions,
    COUNT(*) AS num_functions,
FROM DistinctFunctions

Unnamed: 0,num_primitive_functions,num_functions
0,224,2206


In [100]:
%%db_query
WITH
    DistinctFunctions AS (
        SELECT DISTINCT
            function_name,
            argument_types,
            return_type
        FROM CoreUtilsFunctions
    )
SELECT *
FROM DistinctFunctions
WHERE
    LEN(argument_types) > 0
    AND LIST_HAS_ALL(['i1', 'i8', 'i16', 'i32', 'i64'], argument_types)
    AND return_type IN ('i1', 'i8', 'i16', 'i32', 'i64')

Unnamed: 0,function_name,argument_types,return_type
0,ydhms_diff,"[i64, i64, i32, i32, i32, i32, i32, i32, i32, i32]",i64
1,posixtest,[i32],i1
2,isubase64,[i8],i1
3,isubase64url,[i8],i1
4,base16_length,[i32],i32
5,base2_length,[i32],i32
6,isuz85,[i8],i1
7,c_isalnum,[i32],i1
8,write_zeros,"[i32, i64]",i1
9,multiple_bits_set,[i32],i1
