In [1]:
%%writefile sum.cpp
#include<iostream>
#include<algorithm>
using namespace std;

constexpr int M = 2048;
constexpr int N = 2048;

double naive_sum(const double a[][N]){
    double sum = 0.0;
    for(int j = 0; j < N; ++j) {
        for(int i = 0; i < M; ++i)
            sum += a[i][j];
    }
    return sum;
}

double improved_sum(const double a[][N]) {
    double sum = 0.0;
    for(int i = 0; i < M; ++i)
        for(int j = 0; j < N; ++j)
            sum +=a[i][j];
    return sum;
}

int main() {
    static double a[M][N];
    fill_n(&a[0][0], M*N, 1.0 / (M*N));
    cout << naive_sum(a) << endl;
    static double b[M][N];
    fill_n(&b[0][0], M*N, 1.0 / (M*N));
    cout << improved_sum(b) << endl;
    return 0;
}

Writing sum.cpp


In [2]:
!g++ -Wall -O3 -g sum.cpp -o sum


In [3]:
!apt-get install valgrind

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  gdb libbabeltrace1 libc6-dbg libdebuginfod-common libdebuginfod1 libipt2
  libsource-highlight-common libsource-highlight4v5
Suggested packages:
  gdb-doc gdbserver valgrind-dbg valgrind-mpi kcachegrind alleyoop valkyrie
The following NEW packages will be installed:
  gdb libbabeltrace1 libc6-dbg libdebuginfod-common libdebuginfod1 libipt2
  libsource-highlight-common libsource-highlight4v5 valgrind
0 upgraded, 9 newly installed, 0 to remove and 18 not upgraded.
Need to get 32.4 MB of archives.
After this operation, 111 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 libdebuginfod-common all 0.186-1build1 [7,878 B]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libbabeltrace1 amd64 1.5.8-2build1 [160 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 libdebuginfod1 amd6

In [4]:
!valgrind --tool=callgrind ./sum

==1801== Callgrind, a call-graph generating cache profiler
==1801== Copyright (C) 2002-2017, and GNU GPL'd, by Josef Weidendorfer et al.
==1801== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==1801== Command: ./sum
==1801== 
==1801== For interactive control, run 'callgrind_control -h'.
1
1
==1801== 
==1801== Events    : Ir
==1801== Collected : 50580996
==1801== 
==1801== I   refs:      50,580,996


In [5]:
!ls


callgrind.out.1801  sample_data  sum  sum.cpp


In [6]:
!callgrind_annotate callgrind.out.1801 sum.cpp

--------------------------------------------------------------------------------
Profile data file 'callgrind.out.1801' (creator: callgrind-3.18.1)
--------------------------------------------------------------------------------
I1 cache: 
D1 cache: 
LL cache: 
Timerange: Basic block 0 - 10864277
Trigger: Program termination
Profiled target:  ./sum (PID 1801, part 1)
Events recorded:  Ir
Events shown:     Ir
Event sort order: Ir
Thresholds:       99
Include dirs:     
User annotated:   sum.cpp
Auto-annotation:  on

--------------------------------------------------------------------------------
Ir                  
--------------------------------------------------------------------------------
50,580,996 (100.0%)  PROGRAM TOTALS

--------------------------------------------------------------------------------
Ir                   file:function
--------------------------------------------------------------------------------
31,475,723 (62.23%)  sum.cpp:main [/content/sum]
16,777,220 (3

In [8]:
%%writefile profile.cpp
#include<iostream>
#include<algorithm>
#include <cmath>
#include <chrono>
using namespace std;

void heavyCalculation() {
    for (int i = 0; i < 100000; ++i) {
        double result = std::sqrt(static_cast<double>(i));
    }
}

void intermediateFunction() {
    for (int i = 0; i < 1000; ++i) {
        heavyCalculation();
    }
}

void mainFunction() {
    for (int i = 0; i < 5; ++i) {
        intermediateFunction();
    }
}

int main() {
    auto start = std::chrono::high_resolution_clock::now();

    mainFunction();

    auto end = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);

    std::cout << "Time taken: " << duration.count() << " milliseconds" << std::endl;

    return 0;
}


Overwriting profile.cpp


In [9]:
!apt-get install graphviz

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
graphviz is already the newest version (2.42.2-6).
0 upgraded, 0 newly installed, 0 to remove and 18 not upgraded.


In [10]:
!pip install gprof2dot

Collecting gprof2dot
  Downloading gprof2dot-2022.7.29-py2.py3-none-any.whl (34 kB)
Installing collected packages: gprof2dot
Successfully installed gprof2dot-2022.7.29


In [11]:
!g++ -Wall -O0 -g profile.cpp -o profile

[01m[Kprofile.cpp:[m[K In function ‘[01m[Kvoid heavyCalculation()[m[K’:
    9 |         double [01;35m[Kresult[m[K = std::sqrt(static_cast<double>(i));
      |                [01;35m[K^~~~~~[m[K


In [12]:
!valgrind --tool=callgrind ./profile

==7341== Callgrind, a call-graph generating cache profiler
==7341== Copyright (C) 2002-2017, and GNU GPL'd, by Josef Weidendorfer et al.
==7341== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==7341== Command: ./profile
==7341== 
==7341== For interactive control, run 'callgrind_control -h'.
Time taken: 134807 milliseconds
==7341== 
==7341== Events    : Ir
==7341== Collected : 10002410653
==7341== 
==7341== I   refs:      10,002,410,653


In [13]:
!ls

callgrind.out.1801  profile	 sample_data  sum.cpp
callgrind.out.7341  profile.cpp  sum


In [14]:
!callgrind_annotate callgrind.out.7341 profile.cpp

--------------------------------------------------------------------------------
Profile data file 'callgrind.out.7341' (creator: callgrind-3.18.1)
--------------------------------------------------------------------------------
I1 cache: 
D1 cache: 
LL cache: 
Timerange: Basic block 0 - 3000400675
Trigger: Program termination
Profiled target:  ./profile (PID 7341, part 1)
Events recorded:  Ir
Events shown:     Ir
Event sort order: Ir
Thresholds:       99
Include dirs:     
User annotated:   profile.cpp
Auto-annotation:  on

--------------------------------------------------------------------------------
Ir                      
--------------------------------------------------------------------------------
10,002,410,653 (100.0%)  PROGRAM TOTALS

--------------------------------------------------------------------------------
Ir                      file:function
--------------------------------------------------------------------------------
5,000,060,000 (49.99%)  profile.cpp:heavy

In [15]:
!gprof2dot  -n0.1 -f callgrind callgrind.out.7341 | dot -Tsvg -o output.svg

In [None]:
!g++ -g euclides-ingenuo.cpp -o euclides-ingenuo

In [None]:
!valgrind --tool=callgrind ./euclides-ingenuo

==53904== Callgrind, a call-graph generating cache profiler
==53904== Copyright (C) 2002-2017, and GNU GPL'd, by Josef Weidendorfer et al.
==53904== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info
==53904== Command: ./euclides-ingenuo
==53904== 
==53904== For interactive control, run 'callgrind_control -h'.
==53904== 
==53904== Process terminating with default action of signal 2 (SIGINT)
==53904==    at 0x4BB9992: read (read.c:26)
==53904==    by 0x4B31CB5: _IO_file_underflow@@GLIBC_2.2.5 (fileops.c:516)
==53904==    by 0x4B32E15: _IO_default_uflow (genops.c:362)
==53904==    by 0x496B8C0: __gnu_cxx::stdio_sync_filebuf<char, std::char_traits<char> >::underflow() (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.30)
==53904==    by 0x49799D5: std::istream::sentry::sentry(std::istream&, bool) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.30)
==53904==    by 0x4979C31: std::istream::operator>>(int&) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.30)
==53904==    by 0x1095