In [1]:
import pandas as pd
import numpy as np
import os
import subprocess
from tqdm.notebook import tqdm
from collections import defaultdict

In [2]:
train_df = pd.read_csv("../data/c++/train_formattted_processed.csv")
train_df.head()

Unnamed: 0,id,text,code,question,functions,code_context
0,0,Maximum Prefix Sum possible by merging two giv...,#include <bits/stdc++.h>\nusing namespace std;...,Maximum Prefix Sum possible by merging two giv...,"['int maxPresum(vector<int> a, vector<int> b) ...",#include <bits/stdc++.h>\nusing namespace std;...
1,1,Check if a number can be represented as sum of...,#include <bits/stdc++.h>\nusing namespace std;...,Check if a number can be represented as sum of...,"['bool sumOfTwoCubes(int n) {', 'int main() {']",#include <bits/stdc++.h>\nusing namespace std;...
2,3,Nth natural number after removing all numbers ...,#include <bits/stdc++.h>\nusing namespace std;...,Nth natural number after removing all numbers ...,"['long findNthNumber(long long N) {', 'int mai...",#include <bits/stdc++.h>\nusing namespace std;...
3,4,Check if an integer is rotation of another giv...,#include <bits/stdc++.h>\nusing namespace std;...,Check if an integer is rotation of another giv...,"['int check(int A, int B) {', 'int main() {']",#include <bits/stdc++.h>\nusing namespace std;...
4,5,Count of quadruples with product of a pair equ...,#include <bits/stdc++.h>\nusing namespace std;...,Count of quadruples with product of a pair equ...,"['void sameProductQuadruples(int nums[], int N...",#include <bits/stdc++.h>\nusing namespace std;...


In [3]:
print(train_df.loc[0]['code'])

#include <bits/stdc++.h>
using namespace std;
int maxPresum(vector<int> a, vector<int> b) {
  int X = max(a[0], 0);
  for (int i = 1; i < a.size(); i++) {
    a[i] += a[i - 1];
    X = max(X, a[i]);
  }
  int Y = max(b[0], 0);
  for (int i = 1; i < b.size(); i++) {
    b[i] += b[i - 1];
    Y = max(Y, b[i]);
  }
  return X + Y;
}
int main() {
  vector<int> A = {2, -1, 4, -5};
  vector<int> B = {4, -3, 12, 4, -3};
  cout << maxPresum(A, B) << endl;
}


In [5]:
def save_file(path, code):
    with open(path, 'w', encoding='utf-8') as file:
        file.write(code)

def save_to_code_file(directory, id, code, ext, pss=1):
    save_file(f"{directory}/question-{id}-raw-{pss}.{ext}", code)

In [6]:
CPP_SAVE_PATH = './code_files/cpp/'

In [7]:
train_df.apply(lambda row: save_to_code_file(CPP_SAVE_PATH, row.id, row.code, "cpp"), axis=1)

0       None
1       None
2       None
3       None
4       None
        ... 
6258    None
6259    None
6260    None
6261    None
6262    None
Length: 6263, dtype: object

In [7]:
os.chdir("./code_files/cpp/")

cppcheck --enable=all --suppress=missingIncludeSystem --check-level=exhaustive --xml "$source_directory" 2> "$report_directory/error_file.xml"

In [8]:
folder_path = './'
file_list = os.listdir(folder_path)

for file_name in tqdm(file_list):
    if file_name.endswith(".cpp"):
        file_prefix = file_name.split(".")[0]
        # command = f"flake8 --format=json-pretty {folder_path}{file_name} > ./analysis_reports/{file_name}.json"
        proc = subprocess.Popen(["cppcheck", "--enable=all", "--suppress=missingIncludeSystem", "--check-level=exhaustive", "--xml", f"{folder_path}{file_name}"], stdout=open(f"../../analysis_reports_cpp/{file_prefix}.xml", "w"), stderr=open(f"../../analysis_reports_cpp/{file_prefix}.xml", "w"))
        proc.wait()

  0%|          | 0/6263 [00:00<?, ?it/s]

In [9]:
file_prefix = "error_file"

In [None]:
proc = subprocess.Popen(["cppcheck", "--enable=all", "--suppress=missingIncludeSystem", "--check-level=exhaustive", "--xml", "./"], stderr=open(f"../../analysis_reports_cpp/{file_prefix}.xml", "w"))
proc.wait()

In [11]:
import xml.etree.ElementTree as ET

def parse_xml_to_dict(file_path):
    tree = ET.parse(file_path)
    root = tree.getroot()
    data = defaultdict(list)
    errors = root.find('errors')
    for i, error in enumerate(errors.findall('error')):
        error_data = []
        for child in error:
            if child.tag == 'location':
                error_data.append(child.attrib)
        data[f"{error.attrib['id']}"].extend(error_data)
    return data

    

parsed_data = parse_xml_to_dict('../../analysis_reports_cpp/error_file.xml')


In [12]:
len(parsed_data)

95

In [13]:
len(parsed_data['constParameterReference'])

129

In [14]:
parsed_data['constParameterReference'][0]

{'file': 'question-10-pass-1.cpp',
 'line': '3',
 'column': '35',
 'info': "Parameter 'b' can be declared as reference to const"}

In [15]:
files_with_errors = defaultdict(set)

for error_type, errors in parsed_data.items():
    for error in errors:
        files_with_errors[error_type].add(error['file'])

In [16]:
len(files_with_errors)

95

In [17]:
for error_type, files in files_with_errors.items():
    print(f"Error {error_type} occurs in {len(files)} in files.")

Error constParameterReference occurs in 111 in files.
Error passedByValue occurs in 752 in files.
Error variableScope occurs in 183 in files.
Error unreadVariable occurs in 232 in files.
Error constParameter occurs in 1306 in files.
Error shadowVariable occurs in 68 in files.
Error uninitvar occurs in 89 in files.
Error legacyUninitvar occurs in 10 in files.
Error syntaxError occurs in 242 in files.
Error noExplicitConstructor occurs in 26 in files.
Error knownConditionTrueFalse occurs in 221 in files.
Error nonStandardCharLiteral occurs in 37 in files.
Error missingInclude occurs in 38 in files.
Error shiftTooManyBitsSigned occurs in 12 in files.
Error integerOverflow occurs in 12 in files.
Error useStlAlgorithm occurs in 98 in files.
Error cstyleCast occurs in 25 in files.
Error arrayIndexThenCheck occurs in 19 in files.
Error constParameterPointer occurs in 57 in files.
Error unusedVariable occurs in 35 in files.
Error stlFindInsert occurs in 11 in files.
Error constVariablePointer 

In [18]:
len(files_with_errors['constParameter'])

1306

In [19]:
len(files_with_errors['unusedFunction'])

10

In [20]:
len(files_with_errors['passedByValue'])

752

In [21]:
len(set.difference(files_with_errors['constParameter'], files_with_errors['passedByValue']))

1296

In [22]:
files_with_errors['constParameter']

{'question-1583-pass-1.cpp',
 'question-7723-pass-1.cpp',
 'question-3584-pass-1.cpp',
 'question-5012-pass-1.cpp',
 'question-7970-pass-1.cpp',
 'question-8735-pass-1.cpp',
 'question-196-pass-1.cpp',
 'question-5489-pass-1.cpp',
 'question-9327-pass-1.cpp',
 'question-3144-pass-1.cpp',
 'question-4800-pass-1.cpp',
 'question-9482-pass-1.cpp',
 'question-1202-pass-1.cpp',
 'question-5650-pass-1.cpp',
 'question-4126-pass-1.cpp',
 'question-6816-pass-1.cpp',
 'question-5717-pass-1.cpp',
 'question-4352-pass-1.cpp',
 'question-9425-pass-1.cpp',
 'question-4570-pass-1.cpp',
 'question-3759-pass-1.cpp',
 'question-6029-pass-1.cpp',
 'question-7752-pass-1.cpp',
 'question-4283-pass-1.cpp',
 'question-3800-pass-1.cpp',
 'question-7836-pass-1.cpp',
 'question-3279-pass-1.cpp',
 'question-7880-pass-1.cpp',
 'question-7229-pass-1.cpp',
 'question-3314-pass-1.cpp',
 'question-1529-pass-1.cpp',
 'question-1205-pass-1.cpp',
 'question-361-pass-1.cpp',
 'question-1186-pass-1.cpp',
 'question-4169-