#### Analysis of LAPACKE functions

In [7]:
import pandas as pd

import re

def extract_function_names(header_file):
    with open(header_file, 'r') as file:
        content = file.read()

    pattern = r'\b\w+\s+(\w+)\s*\('
    function_names = re.findall(pattern, content)

    return function_names

names = pd.DataFrame(extract_function_names('cblas.h'), columns=["fct_name"])

0      False
1      False
2      False
3      False
4      False
       ...  
159    False
160    False
161    False
162    False
163    False
Name: fct_name, Length: 164, dtype: bool
Empty DataFrame
Columns: [fct_name, fct_type]
Index: []


In [26]:
import re
import pandas as pd

### Generated using CHATGPT

def extract_function_info(file_path):
    function_info = []
    pattern =  r"(\w+\s+)(\w+)\s*\((.*?)\);"

    with open(file_path, 'r') as file:
        content = file.read()

        matches = re.findall(pattern, content, re.DOTALL)
        for match in matches:
            return_type = match[0].strip()

            function_name = match[1]
            if ~ (names["fct_name"] == function_name).any():
                continue
            
            arguments = match[2].split(',')

            for i, arg in enumerate(arguments):
                arg = arg.strip()
                arg_parts = arg.rsplit(' ', 1)
                arg_type = arg_parts[0].strip()
                arg_name = arg_parts[1].strip()

                if '*' in arg_name:
                    arg_name = arg_name.replace('*','')
                    arg_type = arg_type + '*'
                
                function_info.append({
                    "fct_name": function_name,
                    "return_type": return_type,
                    "e_name": function_name.split("_")[1],
                    "arg_pos": i,
                    "arg_type": arg_type,
                    "arg_name": arg_name
                })

    return pd.DataFrame(function_info)

# Usage example
db = extract_function_info('cblas.h')
print(len(db["fct_name"].unique()))

Empty DataFrame
Columns: [types]
Index: []
          fct_name return_type  e_name  arg_pos      arg_type arg_name type
0     cblas_sdsdot       float  sdsdot        0     const int        N  NaN
1     cblas_sdsdot       float  sdsdot        1   const float    alpha  NaN
2     cblas_sdsdot       float  sdsdot        2  const float*        X  NaN
3     cblas_sdsdot       float  sdsdot        3     const int     incX  NaN
4     cblas_sdsdot       float  sdsdot        4  const float*        Y  NaN
...            ...         ...     ...      ...           ...      ...  ...
1230  cblas_zher2k        void  zher2k        8   const void*        B  NaN
1231  cblas_zher2k        void  zher2k        9     const int      ldb  NaN
1232  cblas_zher2k        void  zher2k       10  const double     beta  NaN
1233  cblas_zher2k        void  zher2k       11         void*        C  NaN
1234  cblas_zher2k        void  zher2k       12     const int      ldc  NaN

[1235 rows x 7 columns]
142


### Comparing functions invented/expected

In [64]:

missing = names[~names["fct_name"].isin(db["fct_name"].unique())]
print(f"Missing:{len(missing['fct_name'].unique())}")

print("Added")
print(db[~db["fct_name"].isin(names["fct_name"])])

Missing:0
Added
Empty DataFrame
Columns: [fct_name, arg_pos, arg_type, arg_name]
Index: []


In [65]:
names = names[~names["fct_name"].isin(missing["fct_name"])]
names.to_csv("blas_names.csv")

### Obtained data types

In [73]:
data_types = db["arg_type"].unique()
print(data_types)

['const int' 'const float' 'const float*' 'const double*' 'const void*'
 'void*' 'float*' 'double*' 'const double' 'const enum CBLAS_ORDER'
 'const enum CBLAS_TRANSPOSE' 'const enum CBLAS_UPLO'
 'const enum CBLAS_DIAG' 'const enum CBLAS_SIDE']


In [74]:
for dt in data_types:
    print(dt)
    dt_df = db[db["arg_type"]==dt]
    data = {
        "names": dt_df["arg_name"].unique(),
        "pos": dt_df["arg_pos"].unique(),
        "ocs": dt_df.size
    }
    print(data)

const int
{'names': array(['N', 'incX', 'incY', 'M', 'lda', 'KL', 'KU', 'K', 'ldb', 'ldc'],
      dtype=object), 'pos': array([ 0,  3,  5,  2,  4,  6,  8, 11, 10, 13,  7,  9,  1, 12]), 'ocs': 2470}
const float
{'names': array(['alpha', 'b2', 'c', 's', 'beta'], dtype=object), 'pos': array([ 1,  3,  5,  6,  4,  9, 11,  8,  7, 10]), 'ocs': 185}
const float*
{'names': array(['X', 'Y', 'P', 'A', 'Ap', 'B'], dtype=object), 'pos': array([2, 4, 1, 3, 5, 7, 9, 6, 8]), 'ocs': 225}
const double*
{'names': array(['X', 'Y', 'P', 'A', 'Ap', 'B'], dtype=object), 'pos': array([1, 3, 2, 5, 7, 9, 6, 4, 8]), 'ocs': 205}
const void*
{'names': array(['X', 'Y', 'alpha', 'A', 'beta', 'Ap', 'B'], dtype=object), 'pos': array([ 1,  3,  2,  4,  5,  7,  9,  6, 11,  8, 10]), 'ocs': 780}
void*
{'names': array(['dotu', 'dotc', 'X', 'Y', 'A', 'Ap', 'C', 'B'], dtype=object), 'pos': array([ 5,  1,  3,  4,  2, 10, 12,  7,  8,  6,  9, 11]), 'ocs': 340}
float*
{'names': array(['X', 'Y', 'a', 'b', 'c', 's', 'd1', 'd2', 'b1